#Importing all the libraries
from sklearn.tree import DecisionTreeRegressor
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split,GridSearchCV
from sklearn.metrics import mean_squared_error, r2_score
import numpy as np
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
from sklearn.decomposition import PCA
import matplotlib.pyplot as plt
df = pd.read_csv('./disease-burden-by-risk-factor.csv')
# Define new column names
new_columns = {
'DALYs that are from all causes attributed to low physical activity, in both sexes aged all ages': 'low_physical_activity',
'DALYs that are from all causes attributed to non-exclusive breastfeeding, in both sexes aged all ages': 'non_exclusive_breastfeeding',
'DALYs that are from all causes attributed to air pollution, in both sexes aged all ages': 'air_pollution',
'DALYs that are from all causes attributed to child wasting, in both sexes aged all ages': 'child_wasting',
'DALYs that are from all causes attributed to high systolic blood pressure, in both sexes aged all ages': 'high_systolic_blood_pressure',
'DALYs that are from all causes attributed to high fasting plasma glucose, in both sexes aged all ages': 'high_fasting_plasma_glucose',
'DALYs that are from all causes attributed to child stunting, in both sexes aged all ages': 'child_stunting',
'DALYs that are from all causes attributed to high body-mass index, in both sexes aged all ages': 'high_body_mass_index',
'DALYs that are from all causes attributed to secondhand smoke, in both sexes aged all ages': 'secondhand_smoke',
'DALYs that are from all causes attributed to unsafe sanitation, in both sexes aged all ages': 'unsafe_sanitation',
'DALYs that are from all causes attributed to unsafe water source, in both sexes aged all ages': 'unsafe_water_source',
'DALYs that are from all causes attributed to diet low in vegetables, in both sexes aged all ages': 'diet_low_in_vegetables',
'DALYs that are from all causes attributed to diet low in fruits, in both sexes aged all ages': 'diet_low_in_fruits',
'DALYs that are from all causes attributed to diet high in sodium, in both sexes aged all ages': 'diet_high_in_sodium',
'DALYs that are from all causes attributed to drug use, in both sexes aged all ages': 'drug_use',
'DALYs that are from all causes attributed to household air pollution from solid fuels, in both sexes aged all ages': 'household_air_pollution',
'DALYs that are from all causes attributed to high ldl cholesterol, in both sexes aged all ages': 'high_ldl_cholesterol',
'DALYs that are from all causes attributed to iron deficiency, in both sexes aged all ages': 'iron_deficiency',
'DALYs that are from all causes attributed to zinc deficiency, in both sexes aged all ages': 'zinc_deficiency',
'DALYs that are from all causes attributed to smoking, in both sexes aged all ages': 'smoking',
'DALYs that are from all causes attributed to vitamin a deficiency, in both sexes aged all ages': 'vitamin_a_deficiency',
'DALYs that are from all causes attributed to ambient particulate matter pollution, in both sexes aged all ages': 'ambient_particulate_matter_pollution'
}
# Rename the columns
df.rename(columns=new_columns, inplace=True)
# Print the columns to verify
print(df.columns)
Index(['Entity', 'Code', 'Year', 'low_physical_activity',
'non_exclusive_breastfeeding', 'air_pollution', 'child_wasting',
'high_systolic_blood_pressure', 'high_fasting_plasma_glucose',
'child_stunting', 'high_body_mass_index', 'secondhand_smoke',
'unsafe_sanitation', 'unsafe_water_source', 'diet_low_in_vegetables',
'diet_low_in_fruits', 'diet_high_in_sodium', 'drug_use',
'household_air_pollution', 'high_ldl_cholesterol', 'iron_deficiency',
'zinc_deficiency', 'smoking', 'vitamin_a_deficiency',
'ambient_particulate_matter_pollution'],
dtype='object')
df1 = pd.read_csv('./share-of-total-disease-burden-by-cause.csv')
# Define new column names
new_columns_df1 = {
'DALYs (Disability-Adjusted Life Years) - Self-harm - Sex: Both - Age: All Ages (Percent)': 'self_harm',
'DALYs (Disability-Adjusted Life Years) - Exposure to forces of nature - Sex: Both - Age: All Ages (Percent)': 'exposure_to_forces_of_nature',
'DALYs (Disability-Adjusted Life Years) - Conflict and terrorism - Sex: Both - Age: All Ages (Percent)': 'conflict_and_terrorism',
'DALYs (Disability-Adjusted Life Years) - Interpersonal violence - Sex: Both - Age: All Ages (Percent)': 'interpersonal_violence',
'DALYs (Disability-Adjusted Life Years) - Neglected tropical diseases and malaria - Sex: Both - Age: All Ages (Percent)': 'neglected_tropical_diseases_and_malaria',
'DALYs (Disability-Adjusted Life Years) - Substance use disorders - Sex: Both - Age: All Ages (Percent)': 'substance_use_disorders',
'DALYs (Disability-Adjusted Life Years) - Skin and subcutaneous diseases - Sex: Both - Age: All Ages (Percent)': 'skin_and_subcutaneous_diseases',
'DALYs (Disability-Adjusted Life Years) - Enteric infections - Sex: Both - Age: All Ages (Percent)': 'enteric_infections',
'DALYs (Disability-Adjusted Life Years) - Diabetes and kidney diseases - Sex: Both - Age: All Ages (Percent)': 'diabetes_and_kidney_diseases',
'DALYs (Disability-Adjusted Life Years) - Cardiovascular diseases - Sex: Both - Age: All Ages (Percent)': 'cardiovascular_diseases',
'DALYs (Disability-Adjusted Life Years) - Digestive diseases - Sex: Both - Age: All Ages (Percent)': 'digestive_diseases',
'DALYs (Disability-Adjusted Life Years) - Nutritional deficiencies - Sex: Both - Age: All Ages (Percent)': 'nutritional_deficiencies',
'DALYs (Disability-Adjusted Life Years) - Respiratory infections and tuberculosis - Sex: Both - Age: All Ages (Percent)': 'respiratory_infections_and_tuberculosis',
'DALYs (Disability-Adjusted Life Years) - Neonatal disorders - Sex: Both - Age: All Ages (Percent)': 'neonatal_disorders',
'DALYs (Disability-Adjusted Life Years) - Chronic respiratory diseases - Sex: Both - Age: All Ages (Percent)': 'chronic_respiratory_diseases',
'DALYs (Disability-Adjusted Life Years) - Other non-communicable diseases - Sex: Both - Age: All Ages (Percent)': 'other_non_communicable_diseases',
'DALYs (Disability-Adjusted Life Years) - Maternal disorders - Sex: Both - Age: All Ages (Percent)': 'maternal_disorders',
'DALYs (Disability-Adjusted Life Years) - Unintentional injuries - Sex: Both - Age: All Ages (Percent)': 'unintentional_injuries',
'DALYs (Disability-Adjusted Life Years) - Musculoskeletal disorders - Sex: Both - Age: All Ages (Percent)': 'musculoskeletal_disorders',
'DALYs (Disability-Adjusted Life Years) - Neoplasms - Sex: Both - Age: All Ages (Percent)': 'neoplasms',
'DALYs (Disability-Adjusted Life Years) - Mental disorders - Sex: Both - Age: All Ages (Percent)': 'mental_disorders',
'DALYs (Disability-Adjusted Life Years) - Neurological disorders - Sex: Both - Age: All Ages (Percent)': 'neurological_disorders',
'DALYs (Disability-Adjusted Life Years) - HIV/AIDS and sexually transmitted infections - Sex: Both - Age: All Ages (Percent)': 'HIV_and_sexually_transmitted_infections',
'DALYs (Disability-Adjusted Life Years) - Transport injuries - Sex: Both - Age: All Ages (Percent)': 'transport_injuries',
'DALYs (Disability-Adjusted Life Years) - Sense organ diseases - Sex: Both - Age: All Ages (Percent)': 'sense_organ_diseases'
}
# Rename the columns
df1.rename(columns=new_columns_df1, inplace=True)
# Print the columns to verify
print(df1.columns)
# Print the DataFrame
df1
Index(['Entity', 'Code', 'Year', 'self_harm', 'exposure_to_forces_of_nature',
'conflict_and_terrorism', 'interpersonal_violence',
'neglected_tropical_diseases_and_malaria', 'substance_use_disorders',
'skin_and_subcutaneous_diseases', 'enteric_infections',
'diabetes_and_kidney_diseases', 'cardiovascular_diseases',
'digestive_diseases', 'nutritional_deficiencies',
'respiratory_infections_and_tuberculosis', 'neonatal_disorders',
'chronic_respiratory_diseases', 'other_non_communicable_diseases',
'maternal_disorders', 'unintentional_injuries',
'musculoskeletal_disorders', 'neoplasms', 'mental_disorders',
'neurological_disorders', 'HIV_and_sexually_transmitted_infections',
'transport_injuries', 'sense_organ_diseases'],
dtype='object')
| Entity | Code | Year | self_harm | exposure_to_forces_of_nature | conflict_and_terrorism | interpersonal_violence | neglected_tropical_diseases_and_malaria | substance_use_disorders | skin_and_subcutaneous_diseases | ... | other_non_communicable_diseases | maternal_disorders | unintentional_injuries | musculoskeletal_disorders | neoplasms | mental_disorders | neurological_disorders | HIV_and_sexually_transmitted_infections | transport_injuries | sense_organ_diseases | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | AFG | 1990 | 0.31 | 0.01 | 3.69 | 0.83 | 1.12 | 0.19 | 0.42 | ... | 11.94 | 1.35 | 3.81 | 0.94 | 3.27 | 1.70 | 1.28 | 0.12 | 2.38 | 0.65 |
| 1 | Afghanistan | AFG | 1991 | 0.31 | 0.66 | 4.60 | 1.01 | 1.14 | 0.19 | 0.42 | ... | 11.87 | 1.38 | 4.35 | 0.92 | 3.13 | 1.73 | 1.27 | 0.12 | 2.35 | 0.63 |
| 2 | Afghanistan | AFG | 1992 | 0.33 | 0.29 | 4.94 | 1.06 | 1.10 | 0.20 | 0.44 | ... | 12.27 | 1.44 | 3.94 | 0.90 | 2.98 | 1.79 | 1.27 | 0.15 | 2.42 | 0.60 |
| 3 | Afghanistan | AFG | 1993 | 0.33 | 0.11 | 4.55 | 1.09 | 0.96 | 0.21 | 0.43 | ... | 12.50 | 1.45 | 3.80 | 0.87 | 2.84 | 1.78 | 1.24 | 0.17 | 2.48 | 0.57 |
| 4 | Afghanistan | AFG | 1994 | 0.32 | 0.08 | 6.23 | 1.10 | 0.92 | 0.20 | 0.42 | ... | 12.18 | 1.41 | 3.71 | 0.82 | 2.70 | 1.71 | 1.20 | 0.17 | 2.41 | 0.54 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 6835 | Zimbabwe | ZWE | 2015 | 1.36 | 0.01 | 0.02 | 1.09 | 3.23 | 0.58 | 0.85 | ... | 4.04 | 1.10 | 2.85 | 1.64 | 4.43 | 2.19 | 1.75 | 22.39 | 1.97 | 0.98 |
| 6836 | Zimbabwe | ZWE | 2016 | 1.43 | 0.03 | 0.01 | 1.14 | 2.78 | 0.60 | 0.88 | ... | 4.18 | 1.10 | 3.02 | 1.72 | 4.65 | 2.28 | 1.82 | 21.17 | 2.07 | 1.02 |
| 6837 | Zimbabwe | ZWE | 2017 | 1.48 | 0.22 | 0.01 | 1.17 | 2.85 | 0.63 | 0.91 | ... | 4.29 | 1.10 | 3.31 | 1.80 | 4.84 | 2.36 | 1.88 | 19.77 | 2.16 | 1.05 |
| 6838 | Zimbabwe | ZWE | 2018 | 1.54 | 0.00 | 0.01 | 1.23 | 2.86 | 0.66 | 0.94 | ... | 4.44 | 1.11 | 3.19 | 1.90 | 5.08 | 2.47 | 1.97 | 18.05 | 2.26 | 1.10 |
| 6839 | Zimbabwe | ZWE | 2019 | 1.57 | 0.58 | 0.01 | 1.26 | 2.82 | 0.69 | 0.96 | ... | 4.51 | 1.11 | 3.86 | 1.97 | 5.23 | 2.53 | 2.01 | 16.96 | 2.23 | 1.13 |
6840 rows × 28 columns
import seaborn as sns
corr_matrix = df1.drop(['Year','Entity'],axis =1).corr()
plt.figure(figsize=(18, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=.5)
plt.title('Correlation Matrix of Disease Burden by Sub-Category')
plt.show()
/var/folders/tc/t9c_fvpn08jbwz38bdj660vm0000gn/T/ipykernel_16363/1473648444.py:2: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. corr_matrix = df1.drop(['Year','Entity'],axis =1).corr()
target_column = 'mental_disorders'
related_correlations = corr_matrix[target_column]
# Create a list of columns with correlation >= 0.5
high_correlation_columns = related_correlations[related_correlations >= 0.5].index.tolist()
# Remove the target column itself from the list
high_correlation_columns.remove(target_column)
# Print the result
print("Columns with correlation >= 0.5 to '{}':".format(target_column))
print(high_correlation_columns)
Columns with correlation >= 0.5 to 'mental_disorders': ['substance_use_disorders', 'skin_and_subcutaneous_diseases', 'musculoskeletal_disorders', 'neoplasms', 'neurological_disorders', 'sense_organ_diseases']
def convert_to_actual_dalys(percent_str, total):
# Convert percent_str to float
percent = float(percent_str)
# Perform the calculation
return (percent / 100) * total
total_population = 100000
for col in df1.columns:
# Exclude 'Entity', 'Code', and 'Year' columns
if col not in ['Entity', 'Code', 'Year']:
actual_col_name = col
df1[actual_col_name] = df1[col].apply(convert_to_actual_dalys, total=total_population)
df1
| Entity | Code | Year | self_harm | exposure_to_forces_of_nature | conflict_and_terrorism | interpersonal_violence | neglected_tropical_diseases_and_malaria | substance_use_disorders | skin_and_subcutaneous_diseases | ... | other_non_communicable_diseases | maternal_disorders | unintentional_injuries | musculoskeletal_disorders | neoplasms | mental_disorders | neurological_disorders | HIV_and_sexually_transmitted_infections | transport_injuries | sense_organ_diseases | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | AFG | 1990 | 310.0 | 10.0 | 3690.0 | 830.0 | 1120.0 | 190.0 | 420.0 | ... | 11940.0 | 1350.0 | 3810.0 | 940.0 | 3270.0 | 1700.0 | 1280.0 | 120.0 | 2380.0 | 650.0 |
| 1 | Afghanistan | AFG | 1991 | 310.0 | 660.0 | 4600.0 | 1010.0 | 1140.0 | 190.0 | 420.0 | ... | 11870.0 | 1380.0 | 4350.0 | 920.0 | 3130.0 | 1730.0 | 1270.0 | 120.0 | 2350.0 | 630.0 |
| 2 | Afghanistan | AFG | 1992 | 330.0 | 290.0 | 4940.0 | 1060.0 | 1100.0 | 200.0 | 440.0 | ... | 12270.0 | 1440.0 | 3940.0 | 900.0 | 2980.0 | 1790.0 | 1270.0 | 150.0 | 2420.0 | 600.0 |
| 3 | Afghanistan | AFG | 1993 | 330.0 | 110.0 | 4550.0 | 1090.0 | 960.0 | 210.0 | 430.0 | ... | 12500.0 | 1450.0 | 3800.0 | 870.0 | 2840.0 | 1780.0 | 1240.0 | 170.0 | 2480.0 | 570.0 |
| 4 | Afghanistan | AFG | 1994 | 320.0 | 80.0 | 6230.0 | 1100.0 | 920.0 | 200.0 | 420.0 | ... | 12180.0 | 1410.0 | 3710.0 | 820.0 | 2700.0 | 1710.0 | 1200.0 | 170.0 | 2410.0 | 540.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 6835 | Zimbabwe | ZWE | 2015 | 1360.0 | 10.0 | 20.0 | 1090.0 | 3230.0 | 580.0 | 850.0 | ... | 4040.0 | 1100.0 | 2850.0 | 1640.0 | 4430.0 | 2190.0 | 1750.0 | 22390.0 | 1970.0 | 980.0 |
| 6836 | Zimbabwe | ZWE | 2016 | 1430.0 | 30.0 | 10.0 | 1140.0 | 2780.0 | 600.0 | 880.0 | ... | 4180.0 | 1100.0 | 3020.0 | 1720.0 | 4650.0 | 2280.0 | 1820.0 | 21170.0 | 2070.0 | 1020.0 |
| 6837 | Zimbabwe | ZWE | 2017 | 1480.0 | 220.0 | 10.0 | 1170.0 | 2850.0 | 630.0 | 910.0 | ... | 4290.0 | 1100.0 | 3310.0 | 1800.0 | 4840.0 | 2360.0 | 1880.0 | 19770.0 | 2160.0 | 1050.0 |
| 6838 | Zimbabwe | ZWE | 2018 | 1540.0 | 0.0 | 10.0 | 1230.0 | 2860.0 | 660.0 | 940.0 | ... | 4440.0 | 1110.0 | 3190.0 | 1900.0 | 5080.0 | 2470.0 | 1970.0 | 18050.0 | 2260.0 | 1100.0 |
| 6839 | Zimbabwe | ZWE | 2019 | 1570.0 | 580.0 | 10.0 | 1260.0 | 2820.0 | 690.0 | 960.0 | ... | 4510.0 | 1110.0 | 3860.0 | 1970.0 | 5230.0 | 2530.0 | 2010.0 | 16960.0 | 2230.0 | 1130.0 |
6840 rows × 28 columns
df1
| Entity | Code | Year | self_harm | exposure_to_forces_of_nature | conflict_and_terrorism | interpersonal_violence | neglected_tropical_diseases_and_malaria | substance_use_disorders | skin_and_subcutaneous_diseases | ... | other_non_communicable_diseases | maternal_disorders | unintentional_injuries | musculoskeletal_disorders | neoplasms | mental_disorders | neurological_disorders | HIV_and_sexually_transmitted_infections | transport_injuries | sense_organ_diseases | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | AFG | 1990 | 310.0 | 10.0 | 3690.0 | 830.0 | 1120.0 | 190.0 | 420.0 | ... | 11940.0 | 1350.0 | 3810.0 | 940.0 | 3270.0 | 1700.0 | 1280.0 | 120.0 | 2380.0 | 650.0 |
| 1 | Afghanistan | AFG | 1991 | 310.0 | 660.0 | 4600.0 | 1010.0 | 1140.0 | 190.0 | 420.0 | ... | 11870.0 | 1380.0 | 4350.0 | 920.0 | 3130.0 | 1730.0 | 1270.0 | 120.0 | 2350.0 | 630.0 |
| 2 | Afghanistan | AFG | 1992 | 330.0 | 290.0 | 4940.0 | 1060.0 | 1100.0 | 200.0 | 440.0 | ... | 12270.0 | 1440.0 | 3940.0 | 900.0 | 2980.0 | 1790.0 | 1270.0 | 150.0 | 2420.0 | 600.0 |
| 3 | Afghanistan | AFG | 1993 | 330.0 | 110.0 | 4550.0 | 1090.0 | 960.0 | 210.0 | 430.0 | ... | 12500.0 | 1450.0 | 3800.0 | 870.0 | 2840.0 | 1780.0 | 1240.0 | 170.0 | 2480.0 | 570.0 |
| 4 | Afghanistan | AFG | 1994 | 320.0 | 80.0 | 6230.0 | 1100.0 | 920.0 | 200.0 | 420.0 | ... | 12180.0 | 1410.0 | 3710.0 | 820.0 | 2700.0 | 1710.0 | 1200.0 | 170.0 | 2410.0 | 540.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 6835 | Zimbabwe | ZWE | 2015 | 1360.0 | 10.0 | 20.0 | 1090.0 | 3230.0 | 580.0 | 850.0 | ... | 4040.0 | 1100.0 | 2850.0 | 1640.0 | 4430.0 | 2190.0 | 1750.0 | 22390.0 | 1970.0 | 980.0 |
| 6836 | Zimbabwe | ZWE | 2016 | 1430.0 | 30.0 | 10.0 | 1140.0 | 2780.0 | 600.0 | 880.0 | ... | 4180.0 | 1100.0 | 3020.0 | 1720.0 | 4650.0 | 2280.0 | 1820.0 | 21170.0 | 2070.0 | 1020.0 |
| 6837 | Zimbabwe | ZWE | 2017 | 1480.0 | 220.0 | 10.0 | 1170.0 | 2850.0 | 630.0 | 910.0 | ... | 4290.0 | 1100.0 | 3310.0 | 1800.0 | 4840.0 | 2360.0 | 1880.0 | 19770.0 | 2160.0 | 1050.0 |
| 6838 | Zimbabwe | ZWE | 2018 | 1540.0 | 0.0 | 10.0 | 1230.0 | 2860.0 | 660.0 | 940.0 | ... | 4440.0 | 1110.0 | 3190.0 | 1900.0 | 5080.0 | 2470.0 | 1970.0 | 18050.0 | 2260.0 | 1100.0 |
| 6839 | Zimbabwe | ZWE | 2019 | 1570.0 | 580.0 | 10.0 | 1260.0 | 2820.0 | 690.0 | 960.0 | ... | 4510.0 | 1110.0 | 3860.0 | 1970.0 | 5230.0 | 2530.0 | 2010.0 | 16960.0 | 2230.0 | 1130.0 |
6840 rows × 28 columns
merged_df = pd.merge(df, df1[['Entity','Year','substance_use_disorders', 'skin_and_subcutaneous_diseases', 'musculoskeletal_disorders', 'neoplasms', 'neurological_disorders', 'sense_organ_diseases','mental_disorders']], on=['Year', 'Entity'])
# Display the merged DataFrame
merged_df
print(merged_df.columns)
Index(['Entity', 'Code', 'Year', 'low_physical_activity',
'non_exclusive_breastfeeding', 'air_pollution', 'child_wasting',
'high_systolic_blood_pressure', 'high_fasting_plasma_glucose',
'child_stunting', 'high_body_mass_index', 'secondhand_smoke',
'unsafe_sanitation', 'unsafe_water_source', 'diet_low_in_vegetables',
'diet_low_in_fruits', 'diet_high_in_sodium', 'drug_use',
'household_air_pollution', 'high_ldl_cholesterol', 'iron_deficiency',
'zinc_deficiency', 'smoking', 'vitamin_a_deficiency',
'ambient_particulate_matter_pollution', 'substance_use_disorders',
'skin_and_subcutaneous_diseases', 'musculoskeletal_disorders',
'neoplasms', 'neurological_disorders', 'sense_organ_diseases',
'mental_disorders'],
dtype='object')
merged_df
| Entity | Code | Year | low_physical_activity | non_exclusive_breastfeeding | air_pollution | child_wasting | high_systolic_blood_pressure | high_fasting_plasma_glucose | child_stunting | ... | smoking | vitamin_a_deficiency | ambient_particulate_matter_pollution | substance_use_disorders | skin_and_subcutaneous_diseases | musculoskeletal_disorders | neoplasms | neurological_disorders | sense_organ_diseases | mental_disorders | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | AFG | 1990 | 61720.0600 | 197049.340 | 1986290.40 | 1708694.40 | 663575.50 | 310177.80 | 670056.500 | ... | 146352.77 | 184149.1000 | 143037.550 | 190.0 | 420.0 | 940.0 | 3270.0 | 1280.0 | 650.0 | 1700.0 |
| 1 | Afghanistan | AFG | 1991 | 62191.6000 | 222485.780 | 2069430.00 | 1779057.60 | 670934.56 | 320839.94 | 687930.700 | ... | 148548.45 | 188899.7700 | 148112.800 | 190.0 | 420.0 | 920.0 | 3130.0 | 1270.0 | 630.0 | 1730.0 |
| 2 | Afghanistan | AFG | 1992 | 63325.2340 | 271585.200 | 2298508.20 | 2005481.50 | 685869.90 | 335451.56 | 748159.940 | ... | 152365.47 | 194698.0500 | 163113.840 | 200.0 | 440.0 | 900.0 | 2980.0 | 1270.0 | 600.0 | 1790.0 |
| 3 | Afghanistan | AFG | 1993 | 64873.6250 | 331279.280 | 2555748.20 | 2366581.00 | 705695.94 | 351943.25 | 862717.750 | ... | 157201.58 | 214913.9000 | 180597.550 | 210.0 | 430.0 | 870.0 | 2840.0 | 1240.0 | 570.0 | 1780.0 |
| 4 | Afghanistan | AFG | 1994 | 66452.0700 | 340745.120 | 2707120.00 | 2559192.20 | 725500.00 | 367981.28 | 963305.500 | ... | 162079.20 | 246066.0500 | 190782.720 | 200.0 | 420.0 | 820.0 | 2700.0 | 1200.0 | 540.0 | 1710.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 6715 | Zimbabwe | ZWE | 2015 | 7196.8833 | 65671.530 | 653410.94 | 494563.78 | 288229.12 | 261661.61 | 63780.020 | ... | 254297.17 | 19442.2290 | 132720.830 | 580.0 | 850.0 | 1640.0 | 4430.0 | 1750.0 | 980.0 | 2190.0 |
| 6716 | Zimbabwe | ZWE | 2016 | 7424.6733 | 62675.110 | 641920.20 | 489271.53 | 293492.00 | 267324.88 | 58934.336 | ... | 257688.56 | 15954.1960 | 128623.750 | 600.0 | 880.0 | 1720.0 | 4650.0 | 1820.0 | 1020.0 | 2280.0 |
| 6717 | Zimbabwe | ZWE | 2017 | 7655.2163 | 60760.780 | 625386.40 | 472972.25 | 297975.16 | 271637.40 | 55018.605 | ... | 260584.25 | 14143.2295 | 122945.070 | 630.0 | 910.0 | 1800.0 | 4840.0 | 1880.0 | 1050.0 | 2360.0 |
| 6718 | Zimbabwe | ZWE | 2018 | 7862.5596 | 57715.004 | 609545.30 | 453153.88 | 303253.00 | 276985.75 | 53059.625 | ... | 263968.06 | 14170.8080 | 120112.450 | 660.0 | 940.0 | 1900.0 | 5080.0 | 1970.0 | 1100.0 | 2470.0 |
| 6719 | Zimbabwe | ZWE | 2019 | 8126.4062 | 56559.246 | 600330.00 | 442257.94 | 309965.70 | 283899.03 | 51181.336 | ... | 268873.30 | 13749.4900 | 119444.164 | 690.0 | 960.0 | 1970.0 | 5230.0 | 2010.0 | 1130.0 | 2530.0 |
6720 rows × 32 columns
corr_matrix = merged_df.drop(['Year','Entity'],axis =1).corr()
target_column = 'mental_disorders'
related_correlations = corr_matrix[target_column]
/var/folders/tc/t9c_fvpn08jbwz38bdj660vm0000gn/T/ipykernel_16363/3563348648.py:1: FutureWarning: The default value of numeric_only in DataFrame.corr is deprecated. In a future version, it will default to False. Select only valid columns or specify the value of numeric_only to silence this warning. corr_matrix = merged_df.drop(['Year','Entity'],axis =1).corr()
plt.figure(figsize=(25, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', linewidths=.5)
plt.title('Correlation Matrix of Disease Burden by Sub-Category')
plt.show()
missing_values = merged_df.isnull().sum()
print("Missing Values:\n", missing_values)
Missing Values: Entity 0 Code 570 Year 0 low_physical_activity 0 non_exclusive_breastfeeding 0 air_pollution 0 child_wasting 0 high_systolic_blood_pressure 0 high_fasting_plasma_glucose 0 child_stunting 0 high_body_mass_index 0 secondhand_smoke 0 unsafe_sanitation 0 unsafe_water_source 0 diet_low_in_vegetables 0 diet_low_in_fruits 0 diet_high_in_sodium 0 drug_use 0 household_air_pollution 0 high_ldl_cholesterol 0 iron_deficiency 0 zinc_deficiency 0 smoking 0 vitamin_a_deficiency 0 ambient_particulate_matter_pollution 0 substance_use_disorders 0 skin_and_subcutaneous_diseases 0 musculoskeletal_disorders 0 neoplasms 0 neurological_disorders 0 sense_organ_diseases 0 mental_disorders 0 dtype: int64
print("\nData Types:\n", merged_df.dtypes)
Data Types: Entity object Code object Year int64 low_physical_activity float64 non_exclusive_breastfeeding float64 air_pollution float64 child_wasting float64 high_systolic_blood_pressure float64 high_fasting_plasma_glucose float64 child_stunting float64 high_body_mass_index float64 secondhand_smoke float64 unsafe_sanitation float64 unsafe_water_source float64 diet_low_in_vegetables float64 diet_low_in_fruits float64 diet_high_in_sodium float64 drug_use float64 household_air_pollution float64 high_ldl_cholesterol float64 iron_deficiency float64 zinc_deficiency float64 smoking float64 vitamin_a_deficiency float64 ambient_particulate_matter_pollution float64 substance_use_disorders float64 skin_and_subcutaneous_diseases float64 musculoskeletal_disorders float64 neoplasms float64 neurological_disorders float64 sense_organ_diseases float64 mental_disorders float64 dtype: object
print("\nSummary Statistics:\n", merged_df.describe())
Summary Statistics:
Year low_physical_activity non_exclusive_breastfeeding \
count 6720.000000 6.720000e+03 6.720000e+03
mean 2004.500000 2.616060e+05 5.284880e+05
std 8.656086 1.093153e+06 2.553256e+06
min 1990.000000 5.962806e+00 2.864449e-01
25% 1997.000000 1.955261e+03 3.224378e+02
50% 2004.500000 1.002872e+04 4.687922e+03
75% 2012.000000 4.800914e+04 1.070237e+05
max 2019.000000 1.574794e+07 4.496013e+07
air_pollution child_wasting high_systolic_blood_pressure \
count 6.720000e+03 6.720000e+03 6.720000e+03
mean 5.048454e+06 3.524774e+06 4.188101e+06
std 2.223505e+07 1.735942e+07 1.794726e+07
min 5.418394e+00 4.202705e+00 5.689009e+01
25% 2.355536e+04 2.768432e+03 4.207587e+04
50% 1.545772e+05 3.834691e+04 1.867063e+05
75% 1.013556e+06 6.910512e+05 8.235250e+05
max 2.807178e+08 2.951882e+08 2.354246e+08
high_fasting_plasma_glucose child_stunting high_body_mass_index \
count 6.720000e+03 6.720000e+03 6.720000e+03
mean 2.606789e+06 8.029631e+05 2.378053e+06
std 1.120238e+07 4.142251e+06 1.010609e+07
min 6.466809e+01 3.410204e-02 5.684043e+01
25% 3.012893e+04 7.211207e+01 3.080627e+04
50% 1.247997e+05 3.192497e+03 1.163628e+05
75% 4.976155e+05 1.234826e+05 4.966658e+05
max 1.720687e+08 7.249934e+07 1.602654e+08
secondhand_smoke ... smoking vitamin_a_deficiency \
count 6.720000e+03 ... 6.720000e+03 6.720000e+03
mean 8.362629e+05 ... 4.111163e+06 2.111740e+05
std 3.701606e+06 ... 1.790273e+07 1.145686e+06
min 1.433595e+01 ... 3.961353e+01 2.169985e-02
25% 6.923609e+03 ... 2.649569e+04 3.938570e+01
50% 3.332242e+04 ... 1.370987e+05 6.973932e+02
75% 1.384867e+05 ... 5.876447e+05 2.734946e+04
max 4.668201e+07 ... 1.997948e+08 2.012786e+07
ambient_particulate_matter_pollution substance_use_disorders \
count 6.720000e+03 6720.000000
mean 1.969151e+06 1280.479167
std 8.765911e+06 1023.246212
min 5.147841e+00 40.000000
25% 1.332260e+04 470.000000
50% 6.885797e+04 1050.000000
75% 2.995293e+05 1732.500000
max 1.182154e+08 6660.000000
skin_and_subcutaneous_diseases musculoskeletal_disorders \
count 6720.000000 6720.000000
mean 1623.492560 5009.494048
std 712.395234 3125.986089
min 80.000000 90.000000
25% 1050.000000 2310.000000
50% 1610.000000 4775.000000
75% 2150.000000 7000.000000
max 3970.000000 14510.000000
neoplasms neurological_disorders sense_organ_diseases \
count 6720.000000 6720.000000 6720.000000
mean 9531.322917 3580.559524 1845.034226
std 6177.814757 1723.992479 795.824869
min 360.000000 120.000000 70.000000
25% 4150.000000 2110.000000 1190.000000
50% 8140.000000 3640.000000 1950.000000
75% 14900.000000 4780.000000 2390.000000
max 31660.000000 8690.000000 4310.000000
mental_disorders
count 6720.000000
mean 4828.456845
std 2299.398664
min 220.000000
25% 3020.000000
50% 4680.000000
75% 6392.500000
max 13760.000000
[8 rows x 30 columns]
countries = merged_df['Entity'].unique()
countries.shape
(224,)
countries
array(['Afghanistan', 'African Region (WHO)', 'Albania', 'Algeria',
'American Samoa', 'Andorra', 'Angola', 'Antigua and Barbuda',
'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan',
'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus',
'Belgium', 'Belize', 'Benin', 'Bermuda', 'Bhutan', 'Bolivia',
'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Brunei',
'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon',
'Canada', 'Cape Verde', 'Central African Republic', 'Chad',
'Chile', 'China', 'Colombia', 'Comoros', 'Congo', 'Cook Islands',
'Costa Rica', "Cote d'Ivoire", 'Croatia', 'Cuba', 'Cyprus',
'Czechia', 'Democratic Republic of Congo', 'Denmark', 'Djibouti',
'Dominica', 'Dominican Republic', 'East Asia & Pacific (WB)',
'East Timor', 'Eastern Mediterranean Region (WHO)', 'Ecuador',
'Egypt', 'El Salvador', 'England', 'Equatorial Guinea', 'Eritrea',
'Estonia', 'Eswatini', 'Ethiopia', 'Europe & Central Asia (WB)',
'European Region (WHO)', 'Fiji', 'Finland', 'France', 'G20',
'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece',
'Greenland', 'Grenada', 'Guam', 'Guatemala', 'Guinea',
'Guinea-Bissau', 'Guyana', 'Haiti', 'Honduras', 'Hungary',
'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland',
'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan',
'Kenya', 'Kiribati', 'Kuwait', 'Kyrgyzstan', 'Laos',
'Latin America & Caribbean (WB)', 'Latvia', 'Lebanon', 'Lesotho',
'Liberia', 'Libya', 'Lithuania', 'Luxembourg', 'Madagascar',
'Malawi', 'Malaysia', 'Maldives', 'Mali', 'Malta',
'Marshall Islands', 'Mauritania', 'Mauritius', 'Mexico',
'Micronesia (country)', 'Middle East & North Africa (WB)',
'Moldova', 'Monaco', 'Mongolia', 'Montenegro', 'Morocco',
'Mozambique', 'Myanmar', 'Namibia', 'Nauru', 'Nepal',
'Netherlands', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria',
'Niue', 'North America (WB)', 'North Korea', 'North Macedonia',
'Northern Ireland', 'Northern Mariana Islands', 'Norway',
'OECD Countries', 'Oman', 'Pakistan', 'Palau', 'Palestine',
'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines',
'Poland', 'Portugal', 'Puerto Rico', 'Qatar',
'Region of the Americas (WHO)', 'Romania', 'Russia', 'Rwanda',
'Saint Kitts and Nevis', 'Saint Lucia',
'Saint Vincent and the Grenadines', 'Samoa', 'San Marino',
'Sao Tome and Principe', 'Saudi Arabia', 'Scotland', 'Senegal',
'Serbia', 'Seychelles', 'Sierra Leone', 'Singapore', 'Slovakia',
'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa',
'South Asia (WB)', 'South Korea', 'South Sudan',
'South-East Asia Region (WHO)', 'Spain', 'Sri Lanka',
'Sub-Saharan Africa (WB)', 'Sudan', 'Suriname', 'Sweden',
'Switzerland', 'Syria', 'Taiwan', 'Tajikistan', 'Tanzania',
'Thailand', 'Togo', 'Tokelau', 'Tonga', 'Trinidad and Tobago',
'Tunisia', 'Turkey', 'Turkmenistan', 'Tuvalu', 'Uganda', 'Ukraine',
'United Arab Emirates', 'United Kingdom', 'United States',
'United States Virgin Islands', 'Uruguay', 'Uzbekistan', 'Vanuatu',
'Venezuela', 'Vietnam', 'Wales', 'Western Pacific Region (WHO)',
'World', 'Yemen', 'Zambia', 'Zimbabwe'], dtype=object)
import numpy as np
# List of 195 recognized countries
recognized_countries = [
"Afghanistan", "Albania", "Algeria", "Andorra", "Angola", "Antigua and Barbuda", "Argentina", "Armenia",
"Australia", "Austria", "Azerbaijan", "Bahamas", "Bahrain", "Bangladesh", "Barbados", "Belarus", "Belgium",
"Belize", "Benin", "Bhutan", "Bolivia", "Bosnia and Herzegovina", "Botswana", "Brazil", "Brunei", "Bulgaria",
"Burkina Faso", "Burundi", "Cabo Verde", "Cambodia", "Cameroon", "Canada", "Central African Republic", "Chad",
"Chile", "China", "Colombia", "Comoros", "Congo, Democratic Republic of the", "Congo, Republic of the",
"Costa Rica", "Croatia", "Cuba", "Cyprus", "Czech Republic", "Denmark", "Djibouti", "Dominica", "Dominican Republic",
"East Timor (Timor-Leste)", "Ecuador", "Egypt", "El Salvador", "Equatorial Guinea", "Eritrea", "Estonia", "Eswatini",
"Ethiopia", "Fiji", "Finland", "France", "Gabon", "Gambia", "Georgia", "Germany", "Ghana", "Greece", "Grenada",
"Guatemala", "Guinea", "Guinea-Bissau", "Guyana", "Haiti", "Honduras", "Hungary", "Iceland", "India", "Indonesia",
"Iran", "Iraq", "Ireland", "Israel", "Italy", "Jamaica", "Japan", "Jordan", "Kazakhstan", "Kenya", "Kiribati",
"Korea, North", "Korea, South", "Kosovo", "Kuwait", "Kyrgyzstan", "Laos", "Latvia", "Lebanon", "Lesotho", "Liberia",
"Libya", "Liechtenstein", "Lithuania", "Luxembourg", "Madagascar", "Malawi", "Malaysia", "Maldives", "Mali",
"Malta", "Marshall Islands", "Mauritania", "Mauritius", "Mexico", "Micronesia", "Moldova", "Monaco", "Mongolia",
"Montenegro", "Morocco", "Mozambique", "Myanmar", "Namibia", "Nauru", "Nepal", "Netherlands", "New Zealand",
"Nicaragua", "Niger", "Nigeria", "North Macedonia", "Norway", "Oman", "Pakistan", "Palau", "Palestine", "Panama",
"Papua New Guinea", "Paraguay", "Peru", "Philippines", "Poland", "Portugal", "Qatar", "Romania", "Russia", "Rwanda",
"Saint Kitts and Nevis", "Saint Lucia", "Saint Vincent and the Grenadines", "Samoa", "San Marino", "Sao Tome and Principe",
"Saudi Arabia", "Senegal", "Serbia", "Seychelles", "Sierra Leone", "Singapore", "Slovakia", "Slovenia", "Solomon Islands",
"Somalia", "South Africa", "South Sudan", "Spain", "Sri Lanka", "Sudan", "Suriname", "Sweden", "Switzerland", "Syria",
"Taiwan", "Tajikistan", "Tanzania", "Thailand", "Togo", "Tonga", "Trinidad and Tobago", "Tunisia", "Turkey", "Turkmenistan",
"Tuvalu", "Uganda", "Ukraine", "United Arab Emirates", "United Kingdom", "United States", "Uruguay", "Uzbekistan", "Vanuatu",
"Vatican City (Holy See)", "Venezuela", "Vietnam", "Yemen", "Zambia", "Zimbabwe"
]
# Filter out non-country entities
filtered_entities = [country for country in countries if country in recognized_countries]
# Print the filtered list
print(filtered_entities)
['Afghanistan', 'Albania', 'Algeria', 'Andorra', 'Angola', 'Antigua and Barbuda', 'Argentina', 'Armenia', 'Australia', 'Austria', 'Azerbaijan', 'Bahamas', 'Bahrain', 'Bangladesh', 'Barbados', 'Belarus', 'Belgium', 'Belize', 'Benin', 'Bhutan', 'Bolivia', 'Bosnia and Herzegovina', 'Botswana', 'Brazil', 'Brunei', 'Bulgaria', 'Burkina Faso', 'Burundi', 'Cambodia', 'Cameroon', 'Canada', 'Central African Republic', 'Chad', 'Chile', 'China', 'Colombia', 'Comoros', 'Costa Rica', 'Croatia', 'Cuba', 'Cyprus', 'Denmark', 'Djibouti', 'Dominica', 'Dominican Republic', 'Ecuador', 'Egypt', 'El Salvador', 'Equatorial Guinea', 'Eritrea', 'Estonia', 'Eswatini', 'Ethiopia', 'Fiji', 'Finland', 'France', 'Gabon', 'Gambia', 'Georgia', 'Germany', 'Ghana', 'Greece', 'Grenada', 'Guatemala', 'Guinea', 'Guinea-Bissau', 'Guyana', 'Haiti', 'Honduras', 'Hungary', 'Iceland', 'India', 'Indonesia', 'Iran', 'Iraq', 'Ireland', 'Israel', 'Italy', 'Jamaica', 'Japan', 'Jordan', 'Kazakhstan', 'Kenya', 'Kiribati', 'Kuwait', 'Kyrgyzstan', 'Laos', 'Latvia', 'Lebanon', 'Lesotho', 'Liberia', 'Libya', 'Lithuania', 'Luxembourg', 'Madagascar', 'Malawi', 'Malaysia', 'Maldives', 'Mali', 'Malta', 'Marshall Islands', 'Mauritania', 'Mauritius', 'Mexico', 'Moldova', 'Monaco', 'Mongolia', 'Montenegro', 'Morocco', 'Mozambique', 'Myanmar', 'Namibia', 'Nauru', 'Nepal', 'Netherlands', 'New Zealand', 'Nicaragua', 'Niger', 'Nigeria', 'North Macedonia', 'Norway', 'Oman', 'Pakistan', 'Palau', 'Palestine', 'Panama', 'Papua New Guinea', 'Paraguay', 'Peru', 'Philippines', 'Poland', 'Portugal', 'Qatar', 'Romania', 'Russia', 'Rwanda', 'Saint Kitts and Nevis', 'Saint Lucia', 'Saint Vincent and the Grenadines', 'Samoa', 'San Marino', 'Sao Tome and Principe', 'Saudi Arabia', 'Senegal', 'Serbia', 'Seychelles', 'Sierra Leone', 'Singapore', 'Slovakia', 'Slovenia', 'Solomon Islands', 'Somalia', 'South Africa', 'South Sudan', 'Spain', 'Sri Lanka', 'Sudan', 'Suriname', 'Sweden', 'Switzerland', 'Syria', 'Taiwan', 'Tajikistan', 'Tanzania', 'Thailand', 'Togo', 'Tonga', 'Trinidad and Tobago', 'Tunisia', 'Turkey', 'Turkmenistan', 'Tuvalu', 'Uganda', 'Ukraine', 'United Arab Emirates', 'United Kingdom', 'United States', 'Uruguay', 'Uzbekistan', 'Vanuatu', 'Venezuela', 'Vietnam', 'Yemen', 'Zambia', 'Zimbabwe']
len(filtered_entities)
185
recognized_countries_set = set(filtered_entities)
# Filter the dataframe to include only rows where the 'entity' column is in the recognized countries set
merged_df = merged_df[merged_df['Entity'].isin(recognized_countries_set)]
# Display the filtered dataframe
merged_df
| Entity | Code | Year | low_physical_activity | non_exclusive_breastfeeding | air_pollution | child_wasting | high_systolic_blood_pressure | high_fasting_plasma_glucose | child_stunting | ... | smoking | vitamin_a_deficiency | ambient_particulate_matter_pollution | substance_use_disorders | skin_and_subcutaneous_diseases | musculoskeletal_disorders | neoplasms | neurological_disorders | sense_organ_diseases | mental_disorders | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | AFG | 1990 | 61720.0600 | 197049.340 | 1986290.40 | 1708694.40 | 663575.50 | 310177.80 | 670056.500 | ... | 146352.77 | 184149.1000 | 143037.550 | 190.0 | 420.0 | 940.0 | 3270.0 | 1280.0 | 650.0 | 1700.0 |
| 1 | Afghanistan | AFG | 1991 | 62191.6000 | 222485.780 | 2069430.00 | 1779057.60 | 670934.56 | 320839.94 | 687930.700 | ... | 148548.45 | 188899.7700 | 148112.800 | 190.0 | 420.0 | 920.0 | 3130.0 | 1270.0 | 630.0 | 1730.0 |
| 2 | Afghanistan | AFG | 1992 | 63325.2340 | 271585.200 | 2298508.20 | 2005481.50 | 685869.90 | 335451.56 | 748159.940 | ... | 152365.47 | 194698.0500 | 163113.840 | 200.0 | 440.0 | 900.0 | 2980.0 | 1270.0 | 600.0 | 1790.0 |
| 3 | Afghanistan | AFG | 1993 | 64873.6250 | 331279.280 | 2555748.20 | 2366581.00 | 705695.94 | 351943.25 | 862717.750 | ... | 157201.58 | 214913.9000 | 180597.550 | 210.0 | 430.0 | 870.0 | 2840.0 | 1240.0 | 570.0 | 1780.0 |
| 4 | Afghanistan | AFG | 1994 | 66452.0700 | 340745.120 | 2707120.00 | 2559192.20 | 725500.00 | 367981.28 | 963305.500 | ... | 162079.20 | 246066.0500 | 190782.720 | 200.0 | 420.0 | 820.0 | 2700.0 | 1200.0 | 540.0 | 1710.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 6715 | Zimbabwe | ZWE | 2015 | 7196.8833 | 65671.530 | 653410.94 | 494563.78 | 288229.12 | 261661.61 | 63780.020 | ... | 254297.17 | 19442.2290 | 132720.830 | 580.0 | 850.0 | 1640.0 | 4430.0 | 1750.0 | 980.0 | 2190.0 |
| 6716 | Zimbabwe | ZWE | 2016 | 7424.6733 | 62675.110 | 641920.20 | 489271.53 | 293492.00 | 267324.88 | 58934.336 | ... | 257688.56 | 15954.1960 | 128623.750 | 600.0 | 880.0 | 1720.0 | 4650.0 | 1820.0 | 1020.0 | 2280.0 |
| 6717 | Zimbabwe | ZWE | 2017 | 7655.2163 | 60760.780 | 625386.40 | 472972.25 | 297975.16 | 271637.40 | 55018.605 | ... | 260584.25 | 14143.2295 | 122945.070 | 630.0 | 910.0 | 1800.0 | 4840.0 | 1880.0 | 1050.0 | 2360.0 |
| 6718 | Zimbabwe | ZWE | 2018 | 7862.5596 | 57715.004 | 609545.30 | 453153.88 | 303253.00 | 276985.75 | 53059.625 | ... | 263968.06 | 14170.8080 | 120112.450 | 660.0 | 940.0 | 1900.0 | 5080.0 | 1970.0 | 1100.0 | 2470.0 |
| 6719 | Zimbabwe | ZWE | 2019 | 8126.4062 | 56559.246 | 600330.00 | 442257.94 | 309965.70 | 283899.03 | 51181.336 | ... | 268873.30 | 13749.4900 | 119444.164 | 690.0 | 960.0 | 1970.0 | 5230.0 | 2010.0 | 1130.0 | 2530.0 |
5550 rows × 32 columns
merged_df
| Entity | Code | Year | low_physical_activity | non_exclusive_breastfeeding | air_pollution | child_wasting | high_systolic_blood_pressure | high_fasting_plasma_glucose | child_stunting | ... | smoking | vitamin_a_deficiency | ambient_particulate_matter_pollution | substance_use_disorders | skin_and_subcutaneous_diseases | musculoskeletal_disorders | neoplasms | neurological_disorders | sense_organ_diseases | mental_disorders | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | AFG | 1990 | 61720.0600 | 197049.340 | 1986290.40 | 1708694.40 | 663575.50 | 310177.80 | 670056.500 | ... | 146352.77 | 184149.1000 | 143037.550 | 190.0 | 420.0 | 940.0 | 3270.0 | 1280.0 | 650.0 | 1700.0 |
| 1 | Afghanistan | AFG | 1991 | 62191.6000 | 222485.780 | 2069430.00 | 1779057.60 | 670934.56 | 320839.94 | 687930.700 | ... | 148548.45 | 188899.7700 | 148112.800 | 190.0 | 420.0 | 920.0 | 3130.0 | 1270.0 | 630.0 | 1730.0 |
| 2 | Afghanistan | AFG | 1992 | 63325.2340 | 271585.200 | 2298508.20 | 2005481.50 | 685869.90 | 335451.56 | 748159.940 | ... | 152365.47 | 194698.0500 | 163113.840 | 200.0 | 440.0 | 900.0 | 2980.0 | 1270.0 | 600.0 | 1790.0 |
| 3 | Afghanistan | AFG | 1993 | 64873.6250 | 331279.280 | 2555748.20 | 2366581.00 | 705695.94 | 351943.25 | 862717.750 | ... | 157201.58 | 214913.9000 | 180597.550 | 210.0 | 430.0 | 870.0 | 2840.0 | 1240.0 | 570.0 | 1780.0 |
| 4 | Afghanistan | AFG | 1994 | 66452.0700 | 340745.120 | 2707120.00 | 2559192.20 | 725500.00 | 367981.28 | 963305.500 | ... | 162079.20 | 246066.0500 | 190782.720 | 200.0 | 420.0 | 820.0 | 2700.0 | 1200.0 | 540.0 | 1710.0 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 6715 | Zimbabwe | ZWE | 2015 | 7196.8833 | 65671.530 | 653410.94 | 494563.78 | 288229.12 | 261661.61 | 63780.020 | ... | 254297.17 | 19442.2290 | 132720.830 | 580.0 | 850.0 | 1640.0 | 4430.0 | 1750.0 | 980.0 | 2190.0 |
| 6716 | Zimbabwe | ZWE | 2016 | 7424.6733 | 62675.110 | 641920.20 | 489271.53 | 293492.00 | 267324.88 | 58934.336 | ... | 257688.56 | 15954.1960 | 128623.750 | 600.0 | 880.0 | 1720.0 | 4650.0 | 1820.0 | 1020.0 | 2280.0 |
| 6717 | Zimbabwe | ZWE | 2017 | 7655.2163 | 60760.780 | 625386.40 | 472972.25 | 297975.16 | 271637.40 | 55018.605 | ... | 260584.25 | 14143.2295 | 122945.070 | 630.0 | 910.0 | 1800.0 | 4840.0 | 1880.0 | 1050.0 | 2360.0 |
| 6718 | Zimbabwe | ZWE | 2018 | 7862.5596 | 57715.004 | 609545.30 | 453153.88 | 303253.00 | 276985.75 | 53059.625 | ... | 263968.06 | 14170.8080 | 120112.450 | 660.0 | 940.0 | 1900.0 | 5080.0 | 1970.0 | 1100.0 | 2470.0 |
| 6719 | Zimbabwe | ZWE | 2019 | 8126.4062 | 56559.246 | 600330.00 | 442257.94 | 309965.70 | 283899.03 | 51181.336 | ... | 268873.30 | 13749.4900 | 119444.164 | 690.0 | 960.0 | 1970.0 | 5230.0 | 2010.0 | 1130.0 | 2530.0 |
5550 rows × 32 columns
from sklearn.decomposition import PCA
from sklearn.preprocessing import StandardScaler
data_to_scale = merged_df.drop(columns=['Year', 'Entity','Code'])
# Scale the data
scaler = StandardScaler()
merged_df_scaled = scaler.fit_transform(data_to_scale)
# Perform PCA
pca = PCA()
pca_out = pca.fit_transform(merged_df_scaled)
pd.DataFrame({'Center': scaler.mean_
, 'Scale': scaler.scale_}
, index=data_to_scale.columns)
| Center | Scale | |
|---|---|---|
| low_physical_activity | 6.183756e+04 | 1.931739e+05 |
| non_exclusive_breastfeeding | 1.405859e+05 | 5.474627e+05 |
| air_pollution | 1.287456e+06 | 5.875304e+06 |
| child_wasting | 9.337180e+05 | 4.022350e+06 |
| high_systolic_blood_pressure | 1.013699e+06 | 3.894411e+06 |
| high_fasting_plasma_glucose | 6.231589e+05 | 2.323884e+06 |
| child_stunting | 2.162215e+05 | 1.033723e+06 |
| high_body_mass_index | 5.675718e+05 | 1.767211e+06 |
| secondhand_smoke | 2.069046e+05 | 9.895219e+05 |
| unsafe_sanitation | 4.167442e+05 | 2.100466e+06 |
| unsafe_water_source | 5.797473e+05 | 2.858931e+06 |
| diet_low_in_vegetables | 6.166332e+04 | 2.466170e+05 |
| diet_low_in_fruits | 1.318412e+05 | 6.228871e+05 |
| diet_high_in_sodium | 2.026395e+05 | 1.340764e+06 |
| drug_use | 1.346634e+05 | 5.809032e+05 |
| household_air_pollution | 7.858722e+05 | 3.737993e+06 |
| high_ldl_cholesterol | 4.403610e+05 | 1.593582e+06 |
| iron_deficiency | 1.611358e+05 | 9.209309e+05 |
| zinc_deficiency | 4.364368e+03 | 2.376293e+04 |
| smoking | 9.660387e+05 | 4.215962e+06 |
| vitamin_a_deficiency | 5.607599e+04 | 2.574250e+05 |
| ambient_particulate_matter_pollution | 4.874069e+05 | 2.429460e+06 |
| substance_use_disorders | 1.230150e+03 | 9.888776e+02 |
| skin_and_subcutaneous_diseases | 1.592032e+03 | 7.186604e+02 |
| musculoskeletal_disorders | 4.872905e+03 | 3.159057e+03 |
| neoplasms | 9.285721e+03 | 6.243723e+03 |
| neurological_disorders | 3.554023e+03 | 1.766960e+03 |
| sense_organ_diseases | 1.808128e+03 | 7.968354e+02 |
| mental_disorders | 4.833573e+03 | 2.402449e+03 |
print("Number of Principal Components:", pca.n_components_)
Number of Principal Components: 29
pca.explained_variance_
array([1.54999845e+01, 6.53276398e+00, 3.61909967e+00, 7.32560579e-01,
6.25741276e-01, 3.93517665e-01, 3.34616031e-01, 2.91070803e-01,
2.35397005e-01, 1.73817632e-01, 1.35456760e-01, 8.71869269e-02,
8.19874773e-02, 6.25877466e-02, 5.93653146e-02, 4.58567443e-02,
3.21872375e-02, 1.63381244e-02, 1.46354767e-02, 8.10614164e-03,
6.74050510e-03, 4.71839577e-03, 3.51122148e-03, 2.72828411e-03,
2.00595908e-03, 1.77739354e-03, 1.18237924e-03, 2.81399207e-04,
3.56051580e-06])
components_df = pd.DataFrame(pca.components_.T, index=data_to_scale.columns, columns=[f'PC{i+1}' for i in range(pca.n_components_)])
# Display the principal components DataFrame
components_df
| PC1 | PC2 | PC3 | PC4 | PC5 | PC6 | PC7 | PC8 | PC9 | PC10 | ... | PC20 | PC21 | PC22 | PC23 | PC24 | PC25 | PC26 | PC27 | PC28 | PC29 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| low_physical_activity | 0.212415 | 0.145885 | -0.118299 | 0.148716 | 0.176017 | 0.174498 | -0.259750 | -0.065327 | -0.020123 | 0.119085 | ... | -0.046319 | -0.171483 | 0.024133 | -0.176680 | -0.054725 | -0.033785 | 0.098384 | 0.040547 | 0.032751 | 0.001869 |
| non_exclusive_breastfeeding | 0.198540 | -0.157623 | 0.172540 | -0.070970 | -0.207033 | 0.131166 | -0.009283 | 0.053285 | 0.258481 | 0.198225 | ... | -0.094732 | 0.090772 | 0.210018 | 0.178297 | 0.126202 | -0.013032 | -0.091051 | -0.016574 | -0.057973 | 0.001013 |
| air_pollution | 0.249433 | -0.004436 | 0.010712 | -0.141177 | -0.026108 | -0.127332 | 0.169233 | 0.013836 | -0.062056 | 0.041759 | ... | -0.157459 | -0.168074 | 0.126273 | 0.066974 | -0.207898 | -0.173945 | -0.074675 | -0.028926 | -0.001878 | -0.794811 |
| child_wasting | 0.203092 | -0.165662 | 0.209707 | -0.032775 | -0.132708 | 0.065979 | -0.026060 | 0.028135 | 0.068973 | 0.031481 | ... | 0.005129 | 0.060770 | -0.038105 | 0.154870 | -0.270562 | 0.562551 | 0.530557 | 0.051779 | 0.027733 | -0.006829 |
| high_systolic_blood_pressure | 0.224138 | 0.133343 | -0.155266 | -0.008425 | -0.019690 | -0.005114 | -0.102263 | 0.013279 | -0.153901 | -0.086560 | ... | 0.187753 | -0.216277 | 0.147582 | -0.015253 | 0.113661 | -0.019206 | 0.178150 | -0.768055 | -0.081600 | -0.021155 |
| high_fasting_plasma_glucose | 0.227395 | 0.129165 | -0.125603 | 0.042586 | 0.135173 | 0.013292 | -0.094272 | -0.032811 | -0.021061 | -0.039968 | ... | -0.297484 | 0.246951 | 0.036509 | 0.090767 | 0.373547 | -0.250640 | 0.413664 | 0.108037 | 0.030920 | 0.007607 |
| child_stunting | 0.193075 | -0.173807 | 0.230871 | -0.024464 | -0.172057 | 0.095256 | -0.088729 | 0.035597 | 0.030853 | -0.008793 | ... | -0.193822 | -0.320524 | -0.420581 | -0.377370 | 0.467569 | 0.056407 | -0.180505 | -0.016996 | -0.060198 | 0.001799 |
| high_body_mass_index | 0.197867 | 0.165229 | -0.168983 | 0.247438 | 0.102253 | 0.236677 | -0.291714 | -0.025623 | 0.071945 | -0.025535 | ... | 0.128533 | 0.253420 | 0.228727 | -0.135125 | -0.187492 | 0.161851 | -0.445071 | 0.020895 | -0.054178 | 0.000004 |
| secondhand_smoke | 0.231702 | 0.081320 | -0.097590 | -0.213587 | -0.153461 | -0.098731 | 0.252120 | 0.036216 | 0.067969 | 0.104993 | ... | 0.379617 | 0.548524 | -0.220425 | -0.109400 | 0.148446 | -0.141263 | 0.062158 | -0.099244 | 0.114544 | 0.001678 |
| unsafe_sanitation | 0.200048 | -0.162379 | 0.230774 | 0.045155 | 0.011008 | 0.028360 | -0.085475 | 0.004197 | -0.061002 | -0.032213 | ... | 0.219704 | 0.200956 | -0.119401 | -0.120253 | -0.164605 | -0.339413 | 0.020996 | 0.112537 | -0.611853 | -0.018569 |
| unsafe_water_source | 0.202230 | -0.159259 | 0.225930 | 0.053575 | 0.044742 | 0.015358 | -0.081226 | 0.001301 | -0.052449 | -0.025387 | ... | 0.131142 | 0.051927 | -0.076663 | -0.061478 | -0.114225 | -0.198416 | -0.134605 | -0.038397 | 0.756960 | 0.021609 |
| diet_low_in_vegetables | 0.223073 | -0.003001 | 0.061173 | 0.199295 | 0.422392 | -0.189245 | 0.256951 | -0.056530 | 0.259988 | 0.044845 | ... | 0.133436 | -0.211880 | -0.292306 | 0.127354 | -0.146075 | -0.047563 | -0.046581 | 0.109079 | -0.028864 | -0.005125 |
| diet_low_in_fruits | 0.237983 | 0.091141 | -0.098777 | -0.049547 | 0.048964 | -0.155906 | 0.173768 | -0.007815 | -0.048272 | -0.025902 | ... | -0.426286 | 0.222439 | 0.056097 | -0.585453 | -0.192638 | 0.246134 | 0.017965 | 0.013297 | 0.010793 | 0.019107 |
| diet_high_in_sodium | 0.179092 | 0.152992 | -0.227137 | -0.321276 | -0.339342 | -0.039052 | 0.018138 | 0.073114 | -0.173166 | -0.059617 | ... | 0.383996 | -0.288583 | 0.151559 | -0.104752 | 0.078121 | 0.129316 | -0.046616 | 0.456657 | 0.017031 | 0.001244 |
| drug_use | 0.200096 | 0.138842 | -0.136886 | 0.133721 | -0.158013 | 0.306864 | 0.092627 | -0.056794 | 0.593269 | 0.081350 | ... | 0.120283 | -0.192674 | -0.048956 | -0.028293 | -0.031681 | -0.006945 | 0.132248 | -0.025997 | 0.016589 | -0.000058 |
| household_air_pollution | 0.236732 | -0.074069 | 0.099677 | -0.143585 | -0.089884 | -0.119524 | 0.258922 | 0.022198 | 0.097320 | 0.065549 | ... | -0.230638 | -0.173739 | 0.442174 | 0.045945 | -0.127933 | -0.290716 | -0.091767 | -0.015916 | -0.028708 | 0.506683 |
| high_ldl_cholesterol | 0.221231 | 0.128952 | -0.130133 | 0.169879 | 0.083725 | 0.036517 | -0.152345 | -0.017435 | -0.188052 | -0.095517 | ... | -0.052312 | -0.074930 | 0.034550 | 0.180503 | 0.130587 | -0.176917 | 0.183178 | 0.364620 | 0.080789 | 0.002359 |
| iron_deficiency | 0.222299 | -0.059561 | 0.128716 | 0.096306 | 0.389170 | -0.237284 | 0.204726 | -0.058113 | -0.055659 | 0.024745 | ... | 0.200753 | 0.019989 | 0.255890 | 0.108753 | 0.445703 | 0.399599 | -0.163575 | 0.016476 | -0.068514 | -0.002163 |
| zinc_deficiency | 0.179360 | -0.170202 | 0.241040 | 0.061467 | -0.017952 | 0.047043 | -0.173944 | -0.009361 | -0.202308 | -0.141467 | ... | -0.001664 | 0.020548 | 0.038282 | -0.012709 | -0.048315 | 0.050459 | 0.032140 | 0.012431 | -0.018547 | -0.001104 |
| smoking | 0.212048 | 0.153815 | -0.172452 | -0.102328 | -0.189664 | 0.051753 | -0.036699 | -0.024185 | -0.010133 | -0.031165 | ... | -0.332536 | 0.161181 | -0.297513 | 0.504077 | 0.036267 | 0.161721 | -0.377227 | -0.057790 | -0.054894 | 0.003514 |
| vitamin_a_deficiency | 0.178420 | -0.188677 | 0.228016 | 0.018896 | -0.129539 | 0.104102 | -0.191474 | -0.007663 | -0.137638 | -0.154974 | ... | 0.062788 | 0.066408 | 0.120335 | 0.147713 | -0.023945 | -0.007193 | -0.061749 | -0.043667 | 0.023674 | 0.001027 |
| ambient_particulate_matter_pollution | 0.230421 | 0.097758 | -0.120554 | -0.113656 | 0.075978 | -0.119461 | 0.002449 | 0.001261 | -0.297623 | -0.003130 | ... | 0.027707 | -0.164551 | -0.368588 | 0.120523 | -0.305179 | 0.024896 | -0.015329 | -0.088753 | -0.051579 | 0.331291 |
| substance_use_disorders | -0.009187 | 0.253340 | 0.152362 | 0.655318 | -0.371437 | 0.046692 | 0.428561 | 0.211380 | -0.213386 | -0.183106 | ... | -0.005056 | 0.006671 | -0.002515 | 0.009304 | 0.007876 | -0.004027 | 0.002399 | -0.005918 | 0.000710 | -0.000061 |
| skin_and_subcutaneous_diseases | -0.026409 | 0.286467 | 0.254842 | -0.308609 | 0.135932 | 0.182479 | 0.156102 | -0.223061 | 0.131008 | -0.713949 | ... | -0.015133 | 0.017389 | 0.010965 | -0.000632 | -0.002133 | -0.002841 | 0.001832 | -0.003471 | 0.000801 | 0.000058 |
| musculoskeletal_disorders | -0.020727 | 0.316480 | 0.269477 | -0.066615 | 0.016980 | 0.120133 | -0.088698 | -0.259265 | 0.090455 | 0.068288 | ... | 0.034514 | -0.014673 | 0.013300 | -0.027494 | -0.013693 | -0.009945 | -0.010569 | 0.006906 | 0.000857 | -0.000589 |
| neoplasms | -0.022955 | 0.290043 | 0.219899 | 0.113174 | -0.278239 | -0.534592 | -0.247566 | -0.448990 | 0.062620 | 0.185613 | ... | -0.004445 | 0.016030 | 0.008622 | -0.004350 | 0.004853 | -0.002982 | 0.018111 | -0.004956 | 0.000813 | 0.000295 |
| neurological_disorders | -0.034357 | 0.315475 | 0.279734 | -0.045605 | 0.000134 | -0.007555 | -0.045102 | -0.043465 | -0.083081 | 0.200422 | ... | 0.033316 | -0.041694 | -0.005462 | 0.011084 | -0.019953 | 0.013155 | -0.014659 | 0.009137 | -0.001405 | -0.000197 |
| sense_organ_diseases | 0.003371 | 0.302272 | 0.197044 | -0.087097 | 0.091879 | -0.287581 | -0.299995 | 0.767711 | 0.251729 | -0.093977 | ... | -0.015350 | 0.003857 | -0.001763 | 0.011869 | -0.010223 | 0.002640 | -0.000232 | -0.000998 | 0.000685 | -0.000351 |
| mental_disorders | -0.033414 | 0.283241 | 0.262180 | -0.190954 | 0.185897 | 0.423681 | 0.196368 | 0.127084 | -0.298983 | 0.466906 | ... | -0.029407 | 0.017335 | 0.000300 | 0.013593 | 0.021954 | 0.009086 | 0.009116 | -0.005454 | -0.001966 | 0.000614 |
29 rows × 29 columns
from adjustText import adjust_text
np.random.seed(0)
colors = np.random.rand(len(merged_df['Entity'].unique()))
# Create the plot
fig, ax1 = plt.subplots(figsize=(14, 10))
ax1.set_xlim(-3.5, 3.5)
ax1.set_ylim(-3.5, 3.5)
texts = []
# Plot country names for PC1 and PC2
for i, label in enumerate(merged_df['Entity'].unique().tolist()):
x = pca_out[i, 0]
y = pca_out[i, 1]
ax1.scatter(x, y, color=plt.cm.viridis(colors[i]), s=50, alpha=0.8, edgecolor='w', linewidth=0.5)
texts.append(ax1.text(x, y, label, fontsize=8))
adjust_text(texts, arrowprops=dict(arrowstyle='-', color='gray', lw=0.5))
ax1.set_xlabel("PC1", fontsize=14)
ax1.set_ylabel("PC2", fontsize=14)
ax1.set_title("PCA of Countries Based on Health Metrics", fontsize=16)
ax1.grid(True)
plt.savefig('PCA_of_Countries_Health_Metrics.png', dpi=300, bbox_inches='tight')
plt.show()
merged_df_scaled = pd.DataFrame(merged_df_scaled,columns= data_to_scale.columns)
merged_df_scaled
| low_physical_activity | non_exclusive_breastfeeding | air_pollution | child_wasting | high_systolic_blood_pressure | high_fasting_plasma_glucose | child_stunting | high_body_mass_index | secondhand_smoke | unsafe_sanitation | ... | smoking | vitamin_a_deficiency | ambient_particulate_matter_pollution | substance_use_disorders | skin_and_subcutaneous_diseases | musculoskeletal_disorders | neoplasms | neurological_disorders | sense_organ_diseases | mental_disorders | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | -0.000608 | 0.103137 | 0.118944 | 0.192668 | -0.089904 | -0.134680 | 0.439030 | -0.150726 | 0.047507 | -0.079876 | ... | -0.194424 | 0.497516 | -0.141747 | -1.051849 | -1.630857 | -1.244962 | -0.963483 | -1.286970 | -1.453409 | -1.304325 |
| 1 | 0.001833 | 0.149599 | 0.133095 | 0.210161 | -0.088015 | -0.130092 | 0.456321 | -0.150468 | 0.056777 | -0.060155 | ... | -0.193904 | 0.515971 | -0.139658 | -1.051849 | -1.630857 | -1.251293 | -0.985906 | -1.292629 | -1.478508 | -1.291837 |
| 2 | 0.007701 | 0.239284 | 0.172085 | 0.266452 | -0.084179 | -0.123805 | 0.514585 | -0.148278 | 0.085124 | -0.026027 | ... | -0.192998 | 0.538495 | -0.133484 | -1.041736 | -1.603028 | -1.257624 | -1.009930 | -1.292629 | -1.516157 | -1.266863 |
| 3 | 0.015717 | 0.348322 | 0.215868 | 0.356225 | -0.079089 | -0.116708 | 0.625406 | -0.145777 | 0.120734 | 0.030372 | ... | -0.191851 | 0.617026 | -0.126287 | -1.031624 | -1.616942 | -1.267120 | -1.032352 | -1.309607 | -1.553806 | -1.271025 |
| 4 | 0.023888 | 0.365613 | 0.241632 | 0.404111 | -0.074003 | -0.109807 | 0.722712 | -0.144219 | 0.143715 | 0.032545 | ... | -0.190694 | 0.738040 | -0.122095 | -1.041736 | -1.630857 | -1.282948 | -1.054775 | -1.332245 | -1.591455 | -1.300162 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5545 | -0.282857 | -0.136839 | -0.107917 | -0.109179 | -0.186285 | -0.155557 | -0.147468 | -0.212618 | -0.133415 | -0.120947 | ... | -0.168821 | -0.142308 | -0.145994 | -0.657462 | -1.032522 | -1.023377 | -0.777696 | -1.020976 | -1.039271 | -1.100366 |
| 5546 | -0.281678 | -0.142312 | -0.109873 | -0.110494 | -0.184934 | -0.153120 | -0.152156 | -0.208528 | -0.133217 | -0.122793 | ... | -0.168016 | -0.155858 | -0.147680 | -0.637237 | -0.990777 | -0.998053 | -0.742461 | -0.981360 | -0.989072 | -1.062904 |
| 5547 | -0.280485 | -0.145809 | -0.112687 | -0.114546 | -0.183782 | -0.151265 | -0.155944 | -0.204645 | -0.133910 | -0.124099 | ... | -0.167329 | -0.162893 | -0.150018 | -0.606900 | -0.949033 | -0.972728 | -0.712030 | -0.947403 | -0.951423 | -1.029605 |
| 5548 | -0.279411 | -0.151373 | -0.115383 | -0.119473 | -0.182427 | -0.148963 | -0.157839 | -0.200608 | -0.134524 | -0.127986 | ... | -0.166527 | -0.162786 | -0.151184 | -0.576562 | -0.907289 | -0.941073 | -0.673592 | -0.896468 | -0.888675 | -0.983818 |
| 5549 | -0.278046 | -0.153484 | -0.116952 | -0.122182 | -0.180703 | -0.145988 | -0.159656 | -0.195987 | -0.134189 | -0.130251 | ... | -0.165363 | -0.164423 | -0.151459 | -0.546225 | -0.879459 | -0.918915 | -0.649568 | -0.873831 | -0.851026 | -0.958844 |
5550 rows × 29 columns
sns.set(style='whitegrid')
fig, ax = plt.subplots(1, 2, figsize=(16, 7), dpi=120)
sns.histplot(merged_df_scaled[merged_df_scaled.columns[0]], kde=True, color='skyblue', ax=ax[0], binwidth=0.5)
ax[0].set_title('Distribution of First Principal Component', fontsize=14)
ax[0].set_xlabel('Component Value', fontsize=12)
ax[0].set_ylabel('Frequency', fontsize=12)
ax[0].set_ylim(0, 100) # Adjusted y-axis range
sns.histplot(merged_df_scaled[merged_df_scaled.columns[1]], kde=True, color='salmon', ax=ax[1], binwidth=0.5)
ax[1].set_title('Distribution of Second Principal Component', fontsize=14)
ax[1].set_xlabel('Component Value', fontsize=12)
ax[1].set_ylabel('Frequency', fontsize=12)
ax[1].set_ylim(0, 100) # Adjusted y-axis range
# Adding a vertical line for the mean
mean_val_0 = merged_df_scaled[merged_df_scaled.columns[0]].mean()
mean_val_1 = merged_df_scaled[merged_df_scaled.columns[1]].mean()
ax[0].axvline(mean_val_0, color='red', linestyle='dashed', linewidth=1)
ax[1].axvline(mean_val_1, color='red', linestyle='dashed', linewidth=1)
# Add text annotation for mean
ax[0].text(mean_val_0 + 0.5, 95, f'Mean: {mean_val_0:.2f}', color = 'red', fontsize=10)
ax[1].text(mean_val_1 + 0.5, 95, f'Mean: {mean_val_1:.2f}', color = 'red', fontsize=10)
plt.tight_layout()
plt.show()
# Plotting the explained variance by PCA
plt.figure(figsize=(8, 6))
plt.bar(range(len(pca.explained_variance_ratio_)), pca.explained_variance_ratio_)
plt.xlabel('Principal Components')
plt.ylabel('Variance Explained')
plt.title('Explained Variance by PCA Components')
plt.show()
pca.explained_variance_ratio_
array([5.34385920e-01, 2.25227135e-01, 1.24774054e-01, 2.52561582e-02,
2.15733976e-02, 1.35671297e-02, 1.15364048e-02, 1.00351158e-02,
8.11567557e-03, 5.99263152e-03, 4.67008115e-03, 3.00590405e-03,
2.82664499e-03, 2.15780929e-03, 2.04671097e-03, 1.58098213e-03,
1.10970476e-03, 5.63282090e-04, 5.04580679e-04, 2.79471761e-04,
2.32389331e-04, 1.62673986e-04, 1.21054787e-04, 9.40618112e-05,
6.91585394e-05, 6.12783892e-05, 4.07643518e-05, 9.70167258e-06,
1.22754285e-07])
# Plot of proportion of variance explained
fig, ax = plt.subplots(1, 2, figsize=(15, 5))
# Plot of proportion of variance explained
ax[0].plot(range(1, 30), pca.explained_variance_ratio_, marker='o')
ax[0].set_xlabel('Principal Component')
ax[0].set_ylabel('Proportion of Variance Explained')
ax[0].set_ylim(0, 1.03)
ax[0].set_xticks(range(1, 30))
# Plot of cumulative proportion of variance explained
ax[1].plot(range(1, 30), np.cumsum(pca.explained_variance_ratio_), marker='o')
ax[1].set_xlabel('Principal Component')
ax[1].set_ylabel('Cumulative Proportion of Variance Explained')
ax[1].set_ylim(0, 1.03)
ax[1].set_xticks(range(1, 30))
plt.tight_layout()
plt.show()
numeric_columns = merged_df_scaled.select_dtypes(include=['float64', 'int64']).columns
merged_numeric = merged_df_scaled[numeric_columns]
# Standardize the numeric data
scaler = StandardScaler()
merged_numeric_scaled = scaler.fit_transform(merged_numeric)
# Perform PCA to reduce to 4 components
pca = PCA(n_components=4)
pca_out = pca.fit_transform(merged_numeric_scaled)
# Print PCA components' loadings
loadings = pd.DataFrame(pca.components_.T, columns=[f'PC{i+1}' for i in range(4)], index=numeric_columns)
print("PCA Loadings:")
print(loadings)
PCA Loadings:
PC1 PC2 PC3 PC4
low_physical_activity 0.212415 0.145885 -0.118299 0.148719
non_exclusive_breastfeeding 0.198540 -0.157623 0.172540 -0.070970
air_pollution 0.249433 -0.004436 0.010712 -0.141176
child_wasting 0.203092 -0.165662 0.209707 -0.032775
high_systolic_blood_pressure 0.224138 0.133343 -0.155266 -0.008427
high_fasting_plasma_glucose 0.227395 0.129165 -0.125603 0.042588
child_stunting 0.193075 -0.173807 0.230871 -0.024464
high_body_mass_index 0.197867 0.165229 -0.168983 0.247438
secondhand_smoke 0.231702 0.081320 -0.097590 -0.213588
unsafe_sanitation 0.200048 -0.162379 0.230774 0.045156
unsafe_water_source 0.202230 -0.159259 0.225930 0.053575
diet_low_in_vegetables 0.223073 -0.003001 0.061173 0.199293
diet_low_in_fruits 0.237983 0.091141 -0.098777 -0.049549
diet_high_in_sodium 0.179092 0.152992 -0.227137 -0.321276
drug_use 0.200096 0.138842 -0.136886 0.133722
household_air_pollution 0.236732 -0.074069 0.099677 -0.143585
high_ldl_cholesterol 0.221231 0.128952 -0.130133 0.169875
iron_deficiency 0.222299 -0.059561 0.128716 0.096307
zinc_deficiency 0.179360 -0.170202 0.241040 0.061468
smoking 0.212048 0.153815 -0.172452 -0.102329
vitamin_a_deficiency 0.178420 -0.188677 0.228016 0.018895
ambient_particulate_matter_pollution 0.230421 0.097758 -0.120554 -0.113654
substance_use_disorders -0.009187 0.253340 0.152362 0.655319
skin_and_subcutaneous_diseases -0.026409 0.286467 0.254842 -0.308608
musculoskeletal_disorders -0.020727 0.316480 0.269477 -0.066616
neoplasms -0.022955 0.290043 0.219899 0.113176
neurological_disorders -0.034357 0.315475 0.279734 -0.045609
sense_organ_diseases 0.003371 0.302272 0.197044 -0.087096
mental_disorders -0.033414 0.283241 0.262180 -0.190953
merged_df.columns
Index(['Entity', 'Code_x', 'Year', 'low_physical_activity',
'non_exclusive_breastfeeding', 'air_pollution', 'child_wasting',
'high_systolic_blood_pressure', 'high_fasting_plasma_glucose',
'child_stunting', 'high_body_mass_index', 'secondhand_smoke',
'unsafe_sanitation', 'unsafe_water_source', 'diet_low_in_vegetables',
'diet_low_in_fruits', 'diet_high_in_sodium', 'drug_use',
'household_air_pollution', 'high_ldl_cholesterol', 'iron_deficiency',
'zinc_deficiency', 'smoking', 'vitamin_a_deficiency',
'ambient_particulate_matter_pollution', 'Code_y',
'substance_use_disorders', 'skin_and_subcutaneous_diseases',
'musculoskeletal_disorders', 'neoplasms', 'neurological_disorders',
'sense_organ_diseases', 'mental_disorders'],
dtype='object')
features = [
'low_physical_activity', 'non_exclusive_breastfeeding', 'air_pollution',
'child_wasting', 'high_systolic_blood_pressure', 'high_fasting_plasma_glucose',
'high_body_mass_index', 'secondhand_smoke', 'unsafe_sanitation', 'unsafe_water_source',
'diet_low_in_vegetables', 'diet_low_in_fruits', 'diet_high_in_sodium', 'drug_use',
'household_air_pollution', 'high_ldl_cholesterol', 'iron_deficiency', 'zinc_deficiency',
'smoking', 'vitamin_a_deficiency', 'ambient_particulate_matter_pollution',
'substance_use_disorders', 'skin_and_subcutaneous_diseases', 'musculoskeletal_disorders',
'neoplasms', 'neurological_disorders', 'sense_organ_diseases', 'child_stunting']
target = 'mental_disorders'
# Normalize the dataset
scaler = MinMaxScaler()
merged_df[features] = scaler.fit_transform(merged_df[features])
merged_df[target] = scaler.fit_transform(merged_df[[target]])
merged_df
| Entity | Code_x | Year | low_physical_activity | non_exclusive_breastfeeding | air_pollution | child_wasting | high_systolic_blood_pressure | high_fasting_plasma_glucose | child_stunting | ... | vitamin_a_deficiency | ambient_particulate_matter_pollution | Code_y | substance_use_disorders | skin_and_subcutaneous_diseases | musculoskeletal_disorders | neoplasms | neurological_disorders | sense_organ_diseases | mental_disorders | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | Afghanistan | AFG | 1990 | 0.024609 | 0.024400 | 0.028845 | 0.022883 | 0.012181 | 0.009364 | 0.035928 | ... | 0.038347 | 0.004351 | AFG | 0.022659 | 0.087404 | 0.058946 | 0.092971 | 0.135356 | 0.139759 | 0.109306 |
| 1 | Afghanistan | AFG | 1991 | 0.024797 | 0.027550 | 0.030052 | 0.023825 | 0.012316 | 0.009686 | 0.036887 | ... | 0.039336 | 0.004506 | AFG | 0.022659 | 0.087404 | 0.057559 | 0.088498 | 0.134189 | 0.134940 | 0.111521 |
| 2 | Afghanistan | AFG | 1992 | 0.025249 | 0.033629 | 0.033379 | 0.026857 | 0.012591 | 0.010128 | 0.040116 | ... | 0.040544 | 0.004962 | AFG | 0.024169 | 0.092545 | 0.056172 | 0.083706 | 0.134189 | 0.127711 | 0.115953 |
| 3 | Afghanistan | AFG | 1993 | 0.025867 | 0.041021 | 0.037115 | 0.031693 | 0.012955 | 0.010626 | 0.046259 | ... | 0.044754 | 0.005494 | AFG | 0.025680 | 0.089974 | 0.054092 | 0.079233 | 0.130688 | 0.120482 | 0.115214 |
| 4 | Afghanistan | AFG | 1994 | 0.026496 | 0.042193 | 0.039313 | 0.034272 | 0.013319 | 0.011111 | 0.051652 | ... | 0.051241 | 0.005804 | AFG | 0.024169 | 0.087404 | 0.050624 | 0.074760 | 0.126021 | 0.113253 | 0.110044 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 5545 | Zimbabwe | ZWE | 2015 | 0.002861 | 0.008132 | 0.009488 | 0.006623 | 0.005287 | 0.007898 | 0.003420 | ... | 0.004049 | 0.004037 | ZWE | 0.081571 | 0.197943 | 0.107490 | 0.130032 | 0.190198 | 0.219277 | 0.145495 |
| 5546 | Zimbabwe | ZWE | 2016 | 0.002952 | 0.007761 | 0.009321 | 0.006552 | 0.005383 | 0.008070 | 0.003160 | ... | 0.003322 | 0.003913 | ZWE | 0.084592 | 0.205656 | 0.113037 | 0.137061 | 0.198366 | 0.228916 | 0.152142 |
| 5547 | Zimbabwe | ZWE | 2017 | 0.003044 | 0.007524 | 0.009081 | 0.006334 | 0.005466 | 0.008200 | 0.002950 | ... | 0.002945 | 0.003740 | ZWE | 0.089124 | 0.213368 | 0.118585 | 0.143131 | 0.205368 | 0.236145 | 0.158050 |
| 5548 | Zimbabwe | ZWE | 2018 | 0.003127 | 0.007147 | 0.008851 | 0.006068 | 0.005563 | 0.008361 | 0.002845 | ... | 0.002951 | 0.003654 | ZWE | 0.093656 | 0.221080 | 0.125520 | 0.150799 | 0.215869 | 0.248193 | 0.166174 |
| 5549 | Zimbabwe | ZWE | 2019 | 0.003232 | 0.007003 | 0.008717 | 0.005923 | 0.005686 | 0.008570 | 0.002744 | ... | 0.002863 | 0.003633 | ZWE | 0.098187 | 0.226221 | 0.130374 | 0.155591 | 0.220537 | 0.255422 | 0.170606 |
5550 rows × 33 columns
pca = PCA(n_components=4) # Select number of components
merged_df_pca = pca.fit_transform(merged_dfdf.drop(columns=['Entity','Year','Code_x','Code_y']))
merged_df_pca
array([[-0.64612718, -0.0248205 , -0.04948753, 0.03328347],
[-0.65036896, -0.02247287, -0.04492744, 0.03350559],
[-0.65257246, -0.01691882, -0.03583051, 0.03512559],
...,
[-0.45180058, -0.06040674, -0.05563566, 0.00464654],
[-0.42997334, -0.05967361, -0.0546987 , 0.00425658],
[-0.41675952, -0.05889445, -0.0550508 , 0.00457749]])
column_names = [f'PC{i}' for i in range(1, merged_df_pca.shape[1] + 1)]
# Create a new DataFrame
pca_df = pd.DataFrame(data=merged_df_pca, columns=column_names)
pca_df
| PC1 | PC2 | PC3 | PC4 | |
|---|---|---|---|---|
| 0 | -0.646127 | -0.024821 | -0.049488 | 0.033283 |
| 1 | -0.650369 | -0.022473 | -0.044927 | 0.033506 |
| 2 | -0.652572 | -0.016919 | -0.035831 | 0.035126 |
| 3 | -0.661566 | -0.008751 | -0.028581 | 0.042535 |
| 4 | -0.673319 | -0.004180 | -0.025247 | 0.046511 |
| ... | ... | ... | ... | ... |
| 5545 | -0.486504 | -0.061376 | -0.057477 | 0.005150 |
| 5546 | -0.468311 | -0.060805 | -0.056375 | 0.004515 |
| 5547 | -0.451801 | -0.060407 | -0.055636 | 0.004647 |
| 5548 | -0.429973 | -0.059674 | -0.054699 | 0.004257 |
| 5549 | -0.416760 | -0.058894 | -0.055051 | 0.004577 |
5550 rows × 4 columns
X = pca_df.values # Features (PC components)
y = merged_df['mental_disorders'].values.ravel() # Target variable
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
param_grid_rbf = {'C': [0.01, 0.1, 1, 10,50,100], 'gamma': [0.01, 0.1, 1, 10,50,100]}
# Initialize GridSearchCV with RBF kernel
grid_search_rbf = GridSearchCV(SVR(kernel='rbf', verbose=True), param_grid_rbf, scoring='neg_mean_squared_error', cv=10, n_jobs=-1)
# Fit the GridSearchCV on the training data
grid_search_rbf.fit(X_train, y_train)
# Get the best parameters and the best estimator
best_params_rbf = grid_search_rbf.best_params_
best_estimator_rbf = grid_search_rbf.best_estimator_
print("Best parameters for RBF kernel:", best_params_rbf)
# Evaluate the best RBF model on the test set
accuracy_rbf = best_estimator_rbf.score(X_test, y_test)
print(f"Accuracy with the best RBF model: {accuracy_rbf * 100:.2f}%")
.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1103
obj = -1.463646, rho = -0.324476
nSV = 1438, nBSV = 1438
....
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1095
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1111
obj = -1.450658, rho = -0.325477
nSV = 1438, nBSV = 1438
Warning: using -h 0 may be faster
*obj = -1.462666, rho = -0.325532
nSV = 1450, nBSV = 1450
optimization finished, #iter = 1110
obj = -1.468013, rho = -0.325201
nSV = 1446, nBSV = 1446
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1111
obj = -1.481792, rho = -0.325563
nSV = 1454, nBSV = 1454
.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1120
.obj = -1.487925, rho = -0.325715
nSV = 1452, nBSV = 1452
.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1111
obj = -1.472229, rho = -0.326717
nSV = 1454, nBSV = 1454
.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1108
obj = -1.462636, rho = -0.324253
nSV = 1444, nBSV = 1444
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1108
obj = -1.474819, rho = -0.328993
nSV = 1466, nBSV = 1466
.*
optimization finished, #iter = 510
obj = -0.510583, rho = -0.337970
nSV = 627, nBSV = 624
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1111
obj = -1.468520, rho = -0.325974
nSV = 1456, nBSV = 1456
*
optimization finished, #iter = 497
obj = -0.498536, rho = -0.337768
nSV = 606, nBSV = 606
*
optimization finished, #iter = 510
obj = -0.508828, rho = -0.336354
nSV = 621, nBSV = 619
*
optimization finished, #iter = 510
obj = -0.501838, rho = -0.336550
nSV = 617, nBSV = 614
*
optimization finished, #iter = 493
obj = -0.498506, rho = -0.339346
nSV = 614, nBSV = 614
*
optimization finished, #iter = 500
obj = -0.488937, rho = -0.341242
nSV = 604, nBSV = 604
*
optimization finished, #iter = 513
obj = -0.496468, rho = -0.335430
nSV = 604, nBSV = 601
*
optimization finished, #iter = 501
obj = -0.499688, rho = -0.338540
nSV = 612, nBSV = 612
*
optimization finished, #iter = 253
obj = -0.274006, rho = -0.356846
nSV = 435, nBSV = 429
*
optimization finished, #iter = 499
obj = -0.494124, rho = -0.336779
nSV = 608, nBSV = 608
*
optimization finished, #iter = 247
obj = -0.264562, rho = -0.352545
nSV = 425, nBSV = 419
*
optimization finished, #iter = 251
obj = -0.265863, rho = -0.348124
nSV = 421, nBSV = 417
*
optimization finished, #iter = 502
obj = -0.497957, rho = -0.335812
nSV = 613, nBSV = 611
*
optimization finished, #iter = 251
obj = -0.266630, rho = -0.351611
nSV = 428, nBSV = 422
*
optimization finished, #iter = 252
obj = -0.261688, rho = -0.354702
nSV = 427, nBSV = 419
*
optimization finished, #iter = 262
obj = -0.266561, rho = -0.351533
nSV = 433, nBSV = 428
*
optimization finished, #iter = 262
obj = -0.271855, rho = -0.352938
nSV = 443, nBSV = 436
*
optimization finished, #iter = 249
obj = -0.262970, rho = -0.353176
nSV = 428, nBSV = 424
*
optimization finished, #iter = 249
obj = -0.267444, rho = -0.351334
nSV = 433, nBSV = 426
*
optimization finished, #iter = 244
obj = -0.266746, rho = -0.354738
nSV = 427, nBSV = 421
*
optimization finished, #iter = 251
obj = -0.269414, rho = -0.359860
nSV = 414, nBSV = 394
*
optimization finished, #iter = 256
obj = -0.275997, rho = -0.357632
nSV = 421, nBSV = 397
*
optimization finished, #iter = 261
obj = -0.273073, rho = -0.357131
nSV = 416, nBSV = 394
*
optimization finished, #iter = 273
obj = -0.277697, rho = -0.362180
nSV = 430, nBSV = 406
*
optimization finished, #iter = 259
obj = -0.277492, rho = -0.359869
nSV = 428, nBSV = 404
*
optimization finished, #iter = 261
obj = -0.273198, rho = -0.362624
nSV = 425, nBSV = 401
*
optimization finished, #iter = 263
obj = -0.267990, rho = -0.361797
nSV = 422, nBSV = 395
*
optimization finished, #iter = 254
obj = -0.272911, rho = -0.358676
nSV = 420, nBSV = 398
*
optimization finished, #iter = 259
obj = -0.270906, rho = -0.360268
nSV = 415, nBSV = 396
*
optimization finished, #iter = 261
obj = -0.271329, rho = -0.358869
nSV = 424, nBSV = 401
*
optimization finished, #iter = 332
obj = -0.426746, rho = -0.373948
nSV = 546, nBSV = 486
*
optimization finished, #iter = 314
obj = -0.416846, rho = -0.372208
nSV = 532, nBSV = 479
*
optimization finished, #iter = 318
obj = -0.426335, rho = -0.372414
nSV = 529, nBSV = 474
*
optimization finished, #iter = 306
obj = -0.419990, rho = -0.373260
nSV = 527, nBSV = 473
*
optimization finished, #iter = 313
obj = -0.423256, rho = -0.371785
nSV = 535, nBSV = 480
*
optimization finished, #iter = 317
obj = -0.428845, rho = -0.373725
nSV = 539, nBSV = 487
*
optimization finished, #iter = 311
obj = -0.419347, rho = -0.376486
nSV = 527, nBSV = 482
*
optimization finished, #iter = 312
obj = -0.423435, rho = -0.373441
nSV = 530, nBSV = 479
*
optimization finished, #iter = 319
obj = -0.420396, rho = -0.372488
nSV = 533, nBSV = 481
*
optimization finished, #iter = 312
obj = -0.415093, rho = -0.372888
nSV = 526, nBSV = 472
*
optimization finished, #iter = 373
obj = -0.543907, rho = -0.379507
nSV = 647, nBSV = 580
*
optimization finished, #iter = 387
obj = -0.552731, rho = -0.378834
nSV = 646, nBSV = 573
*
optimization finished, #iter = 388
obj = -0.557790, rho = -0.381246
nSV = 658, nBSV = 596
*
optimization finished, #iter = 386
obj = -0.542286, rho = -0.379888
nSV = 638, nBSV = 570
*
optimization finished, #iter = 404
obj = -0.558065, rho = -0.380964
nSV = 659, nBSV = 587
*
optimization finished, #iter = 389
obj = -0.550958, rho = -0.380226
nSV = 662, nBSV = 585
*
optimization finished, #iter = 377
obj = -0.551317, rho = -0.381702
nSV = 646, nBSV = 581
*
optimization finished, #iter = 392
obj = -0.545092, rho = -0.380999
nSV = 645, nBSV = 570
*
optimization finished, #iter = 387
obj = -0.546694, rho = -0.379935
nSV = 647, nBSV = 577
*
optimization finished, #iter = 267
obj = -2.317302, rho = -0.442589
nSV = 425, nBSV = 419
*
optimization finished, #iter = 258
obj = -2.236913, rho = -0.436204
nSV = 406, nBSV = 401
*
optimization finished, #iter = 510
obj = -4.852408, rho = -0.343128
nSV = 601, nBSV = 599
*
optimization finished, #iter = 522
obj = -4.835956, rho = -0.338642
nSV = 588, nBSV = 588
*
optimization finished, #iter = 522
obj = -4.758874, rho = -0.336792
nSV = 590, nBSV = 588
*
optimization finished, #iter = 525
obj = -4.974817, rho = -0.342755
nSV = 614, nBSV = 611
*
optimization finished, #iter = 262
obj = -2.236312, rho = -0.441505
nSV = 413, nBSV = 409
*
optimization finished, #iter = 516
obj = -4.810248, rho = -0.339991
nSV = 594, nBSV = 594
*
optimization finished, #iter = 388
obj = -0.544754, rho = -0.383938
nSV = 652, nBSV = 575
*
optimization finished, #iter = 269
obj = -2.199047, rho = -0.443960
nSV = 408, nBSV = 403
*
optimization finished, #iter = 265
obj = -2.259902, rho = -0.434272
nSV = 418, nBSV = 415
*
optimization finished, #iter = 253
obj = -2.242809, rho = -0.434212
nSV = 412, nBSV = 408
*
optimization finished, #iter = 269
obj = -2.232987, rho = -0.439116
nSV = 418, nBSV = 410
*
optimization finished, #iter = 261
obj = -2.308076, rho = -0.440123
nSV = 428, nBSV = 419
*
optimization finished, #iter = 503
obj = -4.852050, rho = -0.343142
nSV = 596, nBSV = 594
*
optimization finished, #iter = 519
obj = -4.959608, rho = -0.339967
nSV = 609, nBSV = 607
*
optimization finished, #iter = 276
obj = -1.474786, rho = -0.396544
nSV = 355, nBSV = 340
*
optimization finished, #iter = 260
obj = -2.197627, rho = -0.434588
nSV = 410, nBSV = 402
*
optimization finished, #iter = 290
obj = -1.513555, rho = -0.400913
nSV = 371, nBSV = 353
*
optimization finished, #iter = 518
obj = -4.849325, rho = -0.339949
nSV = 602, nBSV = 602
*
optimization finished, #iter = 511
obj = -4.865294, rho = -0.342652
nSV = 600, nBSV = 600
*
optimization finished, #iter = 268
obj = -1.437815, rho = -0.398990
nSV = 351, nBSV = 334
*
optimization finished, #iter = 524
obj = -4.891607, rho = -0.338557
nSV = 605, nBSV = 603
*
optimization finished, #iter = 279
obj = -1.458444, rho = -0.398487
nSV = 351, nBSV = 334
*
optimization finished, #iter = 351
obj = -1.077935, rho = -0.360476
nSV = 290, nBSV = 245
*
optimization finished, #iter = 269
obj = -1.460408, rho = -0.397505
nSV = 358, nBSV = 342
*
optimization finished, #iter = 280
obj = -1.427558, rho = -0.399358
nSV = 345, nBSV = 326
*
optimization finished, #iter = 264
obj = -2.240030, rho = -0.443600
nSV = 420, nBSV = 413
*
optimization finished, #iter = 244
obj = -1.464350, rho = -0.396342
nSV = 349, nBSV = 334
*
optimization finished, #iter = 330
obj = -1.063557, rho = -0.359485
nSV = 290, nBSV = 244
*
optimization finished, #iter = 264
obj = -1.510425, rho = -0.397814
nSV = 361, nBSV = 346
*
optimization finished, #iter = 411
obj = -1.081737, rho = -0.361101
nSV = 284, nBSV = 240
*
optimization finished, #iter = 354
obj = -1.089235, rho = -0.360696
nSV = 292, nBSV = 255
*
optimization finished, #iter = 340
obj = -1.071645, rho = -0.360787
nSV = 281, nBSV = 241
*
optimization finished, #iter = 278
obj = -1.443044, rho = -0.401066
nSV = 352, nBSV = 335
*
optimization finished, #iter = 340
obj = -1.093079, rho = -0.359516
nSV = 292, nBSV = 250
*
optimization finished, #iter = 276
obj = -1.459918, rho = -0.396746
nSV = 354, nBSV = 336
*
optimization finished, #iter = 497
obj = -0.983314, rho = -0.367503
nSV = 296, nBSV = 174
*
optimization finished, #iter = 482
obj = -0.965048, rho = -0.366625
nSV = 293, nBSV = 171
*
optimization finished, #iter = 341
obj = -1.113406, rho = -0.359515
nSV = 293, nBSV = 249
*
optimization finished, #iter = 343
obj = -1.067224, rho = -0.359857
nSV = 285, nBSV = 242
*
optimization finished, #iter = 342
obj = -1.063808, rho = -0.359925
nSV = 285, nBSV = 239
*
optimization finished, #iter = 381
obj = -1.067656, rho = -0.358636
nSV = 290, nBSV = 246
*
optimization finished, #iter = 495
obj = -0.995497, rho = -0.367775
nSV = 298, nBSV = 171
*
optimization finished, #iter = 649
obj = -1.140802, rho = -0.371631
nSV = 339, nBSV = 127
*
optimization finished, #iter = 316
obj = -23.293925, rho = -0.648578
nSV = 427, nBSV = 421
*
optimization finished, #iter = 541
obj = -0.986870, rho = -0.367572
nSV = 305, nBSV = 175
*
optimization finished, #iter = 716
obj = -1.138343, rho = -0.372666
nSV = 345, nBSV = 132
*
optimization finished, #iter = 475
obj = -1.008818, rho = -0.367500
nSV = 300, nBSV = 179
*
optimization finished, #iter = 295
obj = -22.093758, rho = -0.655216
nSV = 414, nBSV = 408
*
optimization finished, #iter = 312
obj = -22.109594, rho = -0.652791
nSV = 410, nBSV = 404
*
optimization finished, #iter = 377
obj = -15.263912, rho = -0.659622
nSV = 370, nBSV = 358
*
optimization finished, #iter = 478
obj = -0.984249, rho = -0.365942
nSV = 297, nBSV = 174
*
optimization finished, #iter = 362
obj = -15.060874, rho = -0.671414
nSV = 376, nBSV = 367
*
optimization finished, #iter = 489
obj = -0.993953, rho = -0.367436
nSV = 299, nBSV = 173
*
optimization finished, #iter = 531
obj = -0.984332, rho = -0.367767
*nSV = 297, nBSV = 167
optimization finished, #iter = 304
obj = -22.745007, rho = -0.647209
nSV = 422, nBSV = 416
*
optimization finished, #iter = 718
obj = -1.131764, rho = -0.371926
nSV = 337, nBSV = 126
*
optimization finished, #iter = 637
obj = -1.139698, rho = -0.370886
nSV = 327, nBSV = 127
*
optimization finished, #iter = 313
obj = -22.494278, rho = -0.645674
nSV = 420, nBSV = 412
*
optimization finished, #iter = 312
obj = -23.211450, rho = -0.659395
nSV = 427, nBSV = 421
*
optimization finished, #iter = 372
obj = -15.156937, rho = -0.657676
nSV = 376, nBSV = 363
*
optimization finished, #iter = 508
obj = -0.981855, rho = -0.366995
nSV = 296, nBSV = 171
*
optimization finished, #iter = 314
obj = -22.482080, rho = -0.648772
nSV = 410, nBSV = 404
*
optimization finished, #iter = 535
obj = -0.991780, rho = -0.365616
nSV = 301, nBSV = 170
*
optimization finished, #iter = 671
obj = -1.124870, rho = -0.372196
nSV = 333, nBSV = 127
*
optimization finished, #iter = 385
obj = -15.075611, rho = -0.664930
nSV = 374, nBSV = 363
*
optimization finished, #iter = 666
obj = -1.112458, rho = -0.371123
nSV = 333, nBSV = 129
*
optimization finished, #iter = 294
obj = -22.463901, rho = -0.654999
nSV = 414, nBSV = 408
*
optimization finished, #iter = 721
obj = -1.148453, rho = -0.372086
nSV = 342, nBSV = 126
**
optimization finished, #iter = 349
optimization finished, #iter = 403
obj = -15.748325, rho = -0.669452
obj = -14.972779, rho = -0.688794
nSV = 388, nBSV = 378
nSV = 384, nBSV = 369
*
optimization finished, #iter = 289
obj = -22.494887, rho = -0.609187
nSV = 416, nBSV = 409
**
optimization finished, #iter = 359
obj = -14.797170, rho = -0.672167
nSV = 375, nBSV = 361
optimization finished, #iter = 637
obj = -1.124583, rho = -0.372161
nSV = 340, nBSV = 135
*
optimization finished, #iter = 708
obj = -10.893092, rho = -0.411110
nSV = 310, nBSV = 289
*
optimization finished, #iter = 655
obj = -1.149857, rho = -0.372222
nSV = 339, nBSV = 133
*
optimization finished, #iter = 313
obj = -22.432615, rho = -0.637931
nSV = 420, nBSV = 412
*
optimization finished, #iter = 393
obj = -15.339299, rho = -0.642415
nSV = 375, nBSV = 362
*
optimization finished, #iter = 540
obj = -10.527128, rho = -0.414038
nSV = 297, nBSV = 273
**
optimization finished, #iter = 377
obj = -15.774116, rho = -0.665706
nSV = 383, nBSV = 371
optimization finished, #iter = 685
obj = -1.122288, rho = -0.371004
nSV = 336, nBSV = 123
*
optimization finished, #iter = 592
obj = -10.423611, rho = -0.414606
nSV = 285, nBSV = 268
*
optimization finished, #iter = 390
obj = -15.045847, rho = -0.688430
nSV = 384, nBSV = 372
*
optimization finished, #iter = 699
obj = -10.788471, rho = -0.420705
nSV = 305, nBSV = 283
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1723
obj = -6.125348, rho = -0.368290
nSV = 260, nBSV = 189
*.*
optimization finished, #iter = 667
obj = -11.091288, rho = -0.413109
nSV = 307, nBSV = 285
.*
optimization finished, #iter = 2090
obj = -2.725640, rho = -0.369352
nSV = 255, nBSV = 85
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1345
obj = -5.957202, rho = -0.370751
nSV = 247, nBSV = 186
*.*.*
optimization finished, #iter = 1812
obj = -2.837166, rho = -0.370277
nSV = 249, nBSV = 85
*
optimization finished, #iter = 605
obj = -10.450417, rho = -0.409345
nSV = 293, nBSV = 273
*.*
optimization finished, #iter = 1651
obj = -2.011133, rho = -0.373328
nSV = 289, nBSV = 32
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1788
obj = -2.958236, rho = -0.371088
nSV = 259, nBSV = 94
*
optimization finished, #iter = 666
obj = -10.791890, rho = -0.410281
nSV = 294, nBSV = 270
.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1997
obj = -1.973988, rho = -0.375253
nSV = 294, nBSV = 39
*
optimization finished, #iter = 573
obj = -10.722619, rho = -0.411062
nSV = 300, nBSV = 273
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1363
obj = -6.071466, rho = -0.366508
nSV = 252, nBSV = 190
*.*.*
optimization finished, #iter = 1572
obj = -5.956591, rho = -0.369005
nSV = 256, nBSV = 189
*..*
optimization finished, #iter = 2048
obj = -2.748981, rho = -0.373672
nSV = 270, nBSV = 92
*.*
optimization finished, #iter = 1334
obj = -5.997140, rho = -0.369017
nSV = 255, nBSV = 191
*.*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1885
obj = -2.825156, rho = -0.370093
nSV = 260, nBSV = 90
*
optimization finished, #iter = 1575
obj = -2.012971, rho = -0.373887
nSV = 297, nBSV = 35
*
optimization finished, #iter = 639
obj = -10.605243, rho = -0.412103
nSV = 295, nBSV = 277
*
optimization finished, #iter = 651
obj = -10.599453, rho = -0.412135
nSV = 300, nBSV = 276
*.*
optimization finished, #iter = 1732
obj = -2.824267, rho = -0.370911
nSV = 252, nBSV = 85
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1274
obj = -6.046943, rho = -0.373431
nSV = 253, nBSV = 192
*.*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1563
obj = -5.954101, rho = -0.366737
nSV = 250, nBSV = 188
*
optimization finished, #iter = 1670
obj = -2.066341, rho = -0.373912
nSV = 292, nBSV = 43
*.*.*
optimization finished, #iter = 1920
obj = -2.920294, rho = -0.371329
nSV = 259, nBSV = 94
*.*.*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1654
obj = -6.189725, rho = -0.373729
nSV = 263, nBSV = 197
*
optimization finished, #iter = 533
obj = -172.721975, rho = -1.649370
nSV = 422, nBSV = 414
*
optimization finished, #iter = 1794
obj = -2.077562, rho = -0.375049
nSV = 295, nBSV = 42
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1826
obj = -2.811957, rho = -0.369939
nSV = 256, nBSV = 87
*
optimization finished, #iter = 612
obj = -164.260230, rho = -1.703016
nSV = 412, nBSV = 403
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1429
obj = -5.957973, rho = -0.372260
nSV = 251, nBSV = 185
*.*
optimization finished, #iter = 1767
obj = -1.941010, rho = -0.374525
nSV = 288, nBSV = 34
*.*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1592
obj = -6.285300, rho = -0.373209
nSV = 257, nBSV = 192
*
optimization finished, #iter = 1935
obj = -2.085495, rho = -0.374843
nSV = 295, nBSV = 38
*
optimization finished, #iter = 551
obj = -168.275668, rho = -1.615648
nSV = 416, nBSV = 407
*
optimization finished, #iter = 551
obj = -162.353098, rho = -1.614624
nSV = 408, nBSV = 399
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1866
obj = -2.878577, rho = -0.368451
nSV = 257, nBSV = 91
*.*
optimization finished, #iter = 1600
obj = -2.731404, rho = -0.369501
nSV = 239, nBSV = 81
*
optimization finished, #iter = 532
obj = -172.975840, rho = -1.648652
nSV = 422, nBSV = 414
*.*
optimization finished, #iter = 1905
obj = -1.975816, rho = -0.374237
nSV = 295, nBSV = 42
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1038
obj = -124.709920, rho = -1.068489
nSV = 329, nBSV = 315
*.*
optimization finished, #iter = 1735
obj = -1.990729, rho = -0.376542
nSV = 304, nBSV = 39
*.*
optimization finished, #iter = 586
obj = -165.604000, rho = -1.685946
nSV = 423, nBSV = 413
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1757
obj = -2.025945, rho = -0.374225
nSV = 295, nBSV = 41
*
optimization finished, #iter = 542
obj = -166.713643, rho = -1.557494
nSV = 401, nBSV = 394
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1174
obj = -123.846364, rho = -1.042538
nSV = 338, nBSV = 325
*
optimization finished, #iter = 450
obj = -164.728115, rho = -1.667654
nSV = 409, nBSV = 400
....
Warning: using -h 0 may be faster
*.*
optimization finished, #iter = 4364
obj = -86.869363, rho = -0.441592
nSV = 282, nBSV = 252
.*.
Warning: using -h 0 may be faster
*..
Warning: using -h 0 may be faster
*.
optimization finished, #iter = 1223
obj = -123.615367, rho = -1.068294
nSV = 334, nBSV = 320
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4916
obj = -85.823311, rho = -0.440036
nSV = 275, nBSV = 246
..
Warning: using -h 0 may be faster
*....*
optimization finished, #iter = 5203
obj = -85.351369, rho = -0.454288
nSV = 277, nBSV = 243
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1508
obj = -128.585127, rho = -1.106218
nSV = 352, nBSV = 335
*
optimization finished, #iter = 522
obj = -165.533517, rho = -1.470837
nSV = 407, nBSV = 400
.*
optimization finished, #iter = 546
obj = -165.191682, rho = -1.593941
nSV = 407, nBSV = 398
.*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1426
obj = -123.406957, rho = -1.087858
nSV = 338, nBSV = 324
....
Warning: using -h 0 may be faster
*........*.*
optimization finished, #iter = 11658
obj = -39.209082, rho = -0.355525
nSV = 250, nBSV = 163
..
Warning: using -h 0 may be faster
*.....*
optimization finished, #iter = 10158
obj = -38.811405, rho = -0.352493
nSV = 243, nBSV = 159
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1153
obj = -125.105323, rho = -1.029168
nSV = 336, nBSV = 320
.*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1418
obj = -121.675492, rho = -1.094662
nSV = 330, nBSV = 313
....
Warning: using -h 0 may be faster
.*.
Warning: using -h 0 may be faster
*...
Warning: using -h 0 may be faster
*
optimization finished, #iter = 5259
obj = -90.548363, rho = -0.442002
nSV = 283, nBSV = 258
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4909
obj = -87.713034, rho = -0.445217
nSV = 282, nBSV = 247
.....*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1269
obj = -120.165026, rho = -1.067118
nSV = 324, nBSV = 307
.
Warning: using -h 0 may be faster
Warning: using -h 0 may be faster
*.*.....
Warning: using -h 0 may be faster
*
optimization finished, #iter = 5051
obj = -84.157442, rho = -0.445494
nSV = 279, nBSV = 242
..
Warning: using -h 0 may be faster
*
optimization finished, #iter = 7765
obj = -9.321187, rho = -0.375904
nSV = 232, nBSV = 36
*......*.*
optimization finished, #iter = 9394
obj = -8.337857, rho = -0.377869
nSV = 244, nBSV = 33
..*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1355
obj = -129.034607, rho = -1.062316
nSV = 346, nBSV = 325
.........
Warning: using -h 0 may be faster
*..*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1259
obj = -121.816100, rho = -1.106802
nSV = 333, nBSV = 319
.
Warning: using -h 0 may be faster
.*......*.....*
optimization finished, #iter = 9629
obj = -38.972436, rho = -0.353851
nSV = 249, nBSV = 162
*
optimization finished, #iter = 11481
obj = -37.409831, rho = -0.353266
nSV = 255, nBSV = 159
....
Warning: using -h 0 may be faster
*....
Warning: using -h 0 may be faster
*
optimization finished, #iter = 5592
obj = -87.155684, rho = -0.453602
nSV = 274, nBSV = 246
.
Warning: using -h 0 may be faster
.*..
Warning: using -h 0 may be faster
*..
Warning: using -h 0 may be faster
*...
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4436
obj = -88.068177, rho = -0.430494
nSV = 282, nBSV = 247
......*
optimization finished, #iter = 11020
obj = -38.197506, rho = -0.352704
nSV = 253, nBSV = 163
...
Warning: using -h 0 may be faster
.*..*.*
optimization finished, #iter = 9355
obj = -9.500683, rho = -0.376591
nSV = 246, nBSV = 37
.......
Warning: using -h 0 may be faster
*.*.*.....*
optimization finished, #iter = 8402
obj = -8.622574, rho = -0.376536
nSV = 242, nBSV = 34
*
optimization finished, #iter = 4660
obj = -3.944361, rho = -0.373740
nSV = 292, nBSV = 7
.
Warning: using -h 0 may be faster
.*....*
optimization finished, #iter = 9419
obj = -8.643499, rho = -0.377636
nSV = 247, nBSV = 34
*
optimization finished, #iter = 3304
obj = -2.839366, rho = -0.374953
nSV = 272, nBSV = 4
.......
Warning: using -h 0 may be faster
.*....
Warning: using -h 0 may be faster
**.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4145
obj = -85.400034, rho = -0.429371
nSV = 274, nBSV = 241
....*
optimization finished, #iter = 9783
obj = -37.636531, rho = -0.356508
nSV = 253, nBSV = 158
...
Warning: using -h 0 may be faster
*...........*
optimization finished, #iter = 9758
obj = -39.395585, rho = -0.364586
nSV = 251, nBSV = 165
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*..*......*
optimization finished, #iter = 9799
obj = -39.578521, rho = -0.363551
nSV = 254, nBSV = 166
....*
optimization finished, #iter = 4639
obj = -3.874577, rho = -0.374188
nSV = 286, nBSV = 5
*
optimization finished, #iter = 3940
obj = -3.947793, rho = -0.373371
nSV = 284, nBSV = 6
.*.*
optimization finished, #iter = 8872
obj = -8.866273, rho = -0.377112
nSV = 242, nBSV = 33
...
Warning: using -h 0 may be faster
*.......
Warning: using -h 0 may be faster
Warning: using -h 0 may be faster
*.
optimization finished, #iter = 1417
obj = -781.816868, rho = -3.120530
nSV = 414, nBSV = 402
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 3813
obj = -86.170624, rho = -0.446993
nSV = 277, nBSV = 246
*
optimization finished, #iter = 9166
obj = -8.816787, rho = -0.372518
nSV = 248, nBSV = 32
..*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1583
obj = -742.809936, rho = -3.232852
nSV = 404, nBSV = 392
........
Warning: using -h 0 may be faster
Warning: using -h 0 may be faster
*.*.
Warning: using -h 0 may be faster
*..*.....*
optimization finished, #iter = 3633
obj = -4.017472, rho = -0.374594
nSV = 280, nBSV = 7
.*.
optimization finished, #iter = 3619
obj = -3.479355, rho = -0.374553
nSV = 284, nBSV = 4
.*.
optimization finished, #iter = 4031
obj = -4.022281, rho = -0.374554
nSV = 278, nBSV = 6
*.*
optimization finished, #iter = 9508
obj = -8.171188, rho = -0.374954
nSV = 249, nBSV = 30
.
Warning: using -h 0 may be faster
.*...*.
Warning: using -h 0 may be faster
Warning: using -h 0 may be faster
**
optimization finished, #iter = 1418
obj = -733.123433, rho = -3.059735
nSV = 399, nBSV = 388
optimization finished, #iter = 1121
.obj = -761.027911, rho = -2.944929
nSV = 400, nBSV = 386
*.*
optimization finished, #iter = 8468
obj = -40.794499, rho = -0.357721
nSV = 252, nBSV = 164
......
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*..
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1228
..obj = -782.964564, rho = -3.125813
nSV = 405, nBSV = 393
.*
optimization finished, #iter = 8485
obj = -37.855556, rho = -0.359777
nSV = 244, nBSV = 153
..*
optimization finished, #iter = 7799
obj = -8.834722, rho = -0.374902
nSV = 237, nBSV = 35
.
Warning: using -h 0 may be faster
.*..*
optimization finished, #iter = 3315
obj = -3.829564, rho = -0.376261
nSV = 293, nBSV = 6
.
Warning: using -h 0 may be faster
*....*
optimization finished, #iter = 4750
obj = -3.733763, rho = -0.374582
nSV = 279, nBSV = 6
Warning: using -h 0 may be faster
.*...*
optimization finished, #iter = 3486
obj = -3.979689, rho = -0.373805
nSV = 295, nBSV = 6
..
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1232
obj = -756.635008, rho = -3.025359
nSV = 395, nBSV = 383
.
Warning: using -h 0 may be faster
**.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1286
obj = -746.657563, rho = -3.254865
nSV = 408, nBSV = 394
..
Warning: using -h 0 may be faster
*..
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4797
obj = -564.576921, rho = -1.530398
nSV = 319, nBSV = 299
..*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1472
obj = -745.589069, rho = -3.047800
nSV = 403, nBSV = 392
.*
optimization finished, #iter = 8465
obj = -9.159835, rho = -0.373104
nSV = 240, nBSV = 37
....
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2926
obj = -565.211895, rho = -1.489009
nSV = 326, nBSV = 306
......
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1390
obj = -750.957974, rho = -2.833709
nSV = 401, nBSV = 389
..
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1440
obj = -747.132705, rho = -3.066507
nSV = 400, nBSV = 386
....
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4836
obj = -579.778145, rho = -1.525225
nSV = 338, nBSV = 316
...
Warning: using -h 0 may be faster
*...
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4046
obj = -557.251350, rho = -1.494045
nSV = 320, nBSV = 302
...........
Warning: using -h 0 may be faster
...*.........
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 17921
obj = -381.228061, rho = -0.377898
nSV = 278, nBSV = 235
*........*..*
optimization finished, #iter = 15437
obj = -379.602299, rho = -0.409936
nSV = 268, nBSV = 228
...
Warning: using -h 0 may be faster
*..........*
optimization finished, #iter = 16001
obj = -380.134737, rho = -0.369709
nSV = 267, nBSV = 226
................
Warning: using -h 0 may be faster
.*.....
Warning: using -h 0 may be faster
*..
Warning: using -h 0 may be faster
*
optimization finished, #iter = 3967
obj = -549.478186, rho = -1.529538
nSV = 314, nBSV = 295
........
Warning: using -h 0 may be faster
*.....
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4917
obj = -565.779132, rho = -1.504186
nSV = 326, nBSV = 305
.....*........*....................*....*
optimization finished, #iter = 17430
obj = -17.818197, rho = -0.375401
nSV = 245, nBSV = 13
..........*......
Warning: using -h 0 may be faster
.....*
optimization finished, #iter = 17633
obj = -20.013562, rho = -0.376532
nSV = 238, nBSV = 15
.....
Warning: using -h 0 may be faster
*.*..
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 3672
obj = -557.243302, rho = -1.479429
nSV = 319, nBSV = 300
.*
optimization finished, #iter = 51272
obj = -147.154800, rho = -0.324166
nSV = 242, nBSV = 133
.......*.......................
Warning: using -h 0 may be faster
*
optimization finished, #iter = 3894
obj = -583.265444, rho = -1.485609
nSV = 326, nBSV = 307
.........
Warning: using -h 0 may be faster
*.............*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 17624
obj = -387.508775, rho = -0.411724
nSV = 273, nBSV = 232
..................
Warning: using -h 0 may be faster
.*........*.....
Warning: using -h 0 may be faster
.*...*
optimization finished, #iter = 52273
.obj = -151.360961, rho = -0.332503
nSV = 245, nBSV = 138
.....
Warning: using -h 0 may be faster
.....*
optimization finished, #iter = 18236
obj = -370.671603, rho = -0.401884
nSV = 270, nBSV = 230
......*.*
optimization finished, #iter = 18735
Warning: using -h 0 may be faster
obj = -398.666117, rho = -0.399230
nSV = 274, nBSV = 239
*..
Warning: using -h 0 may be faster
*
optimization finished, #iter = 4454
obj = -541.985879, rho = -1.480543
nSV = 308, nBSV = 289
.............*..............
Warning: using -h 0 may be faster
*.*..*
optimization finished, #iter = 20021
obj = -20.273042, rho = -0.376711
nSV = 244, nBSV = 13
...............
Warning: using -h 0 may be faster
..*..*..*
optimization finished, #iter = 22004
obj = -18.529336, rho = -0.376776
nSV = 238, nBSV = 11
..*..
Warning: using -h 0 may be faster
*
optimization finished, #iter = 17577
obj = -17.836657, rho = -0.379399
nSV = 231, nBSV = 10
.....*
optimization finished, #iter = 6761
obj = -8.211409, rho = -0.375379
nSV = 294, nBSV = 3
.........
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 3132
obj = -550.984679, rho = -1.527228
nSV = 317, nBSV = 297
.........................
Warning: using -h 0 may be faster
..*...............
Warning: using -h 0 may be faster
.*.*.....*
optimization finished, #iter = 15633
obj = -381.250157, rho = -0.410354
nSV = 267, nBSV = 228
..........
Warning: using -h 0 may be faster
*...................*
optimization finished, #iter = 18004
obj = -391.164242, rho = -0.386823
nSV = 280, nBSV = 240
.....................*
optimization finished, #iter = 6100
obj = -8.104388, rho = -0.375950
nSV = 285, nBSV = 3
....*..*..
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 50004
.obj = -140.620124, rho = -0.332156
nSV = 250, nBSV = 127
........*...*.........
Warning: using -h 0 may be faster
..*.......*......*
optimization finished, #iter = 4343
obj = -4.238892, rho = -0.375709
nSV = 272, nBSV = 1
....*........*...............*.
optimization finished, #iter = 19387
obj = -19.453774, rho = -0.375698
nSV = 240, nBSV = 11
....*..............*
optimization finished, #iter = 22040
obj = -19.553129, rho = -0.378863
nSV = 234, nBSV = 12
...........*..*..
Warning: using -h 0 may be faster
...*.*
optimization finished, #iter = 44499
obj = -148.653846, rho = -0.318207
nSV = 243, nBSV = 125
........*.
optimization finished, #iter = 6550
obj = -8.275712, rho = -0.376678
nSV = 291, nBSV = 4
........................
Warning: using -h 0 may be faster
..*.*......*
optimization finished, #iter = 54505
obj = -143.886009, rho = -0.329159
nSV = 250, nBSV = 132
..*.*
optimization finished, #iter = 5210
obj = -8.253266, rho = -0.375243
nSV = 289, nBSV = 4
.....*.......
Warning: using -h 0 may be faster
*.*
optimization finished, #iter = 18561
obj = -14.031212, rho = -0.375167
nSV = 235, nBSV = 7
....
Warning: using -h 0 may be faster
...*..*....
Warning: using -h 0 may be faster
*
optimization finished, #iter = 17814
obj = -379.894165, rho = -0.360453
nSV = 270, nBSV = 234
............*
optimization finished, #iter = 6300
obj = -6.403978, rho = -0.374363
nSV = 286, nBSV = 2
*..........
Warning: using -h 0 may be faster
*..........*
optimization finished, #iter = 15226
obj = -378.120639, rho = -0.355292
nSV = 271, nBSV = 230
................
Warning: using -h 0 may be faster
..*............*..*
optimization finished, #iter = 19786
obj = -18.957277, rho = -0.373519
nSV = 230, nBSV = 12
*.......
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 6573
obj = -7.735303, rho = -0.377177
nSV = 286, nBSV = 4
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2271
obj = -1496.878480, rho = -3.267900
nSV = 395, nBSV = 382
*..................*
optimization finished, #iter = 6793
obj = -8.225162, rho = -0.376974
nSV = 292, nBSV = 4
.......................*..
Warning: using -h 0 may be faster
Warning: using -h 0 may be faster
*
optimization finished, #iter = 49164
obj = -145.198444, rho = -0.328296
nSV = 237, nBSV = 127
.*..*.
optimization finished, #iter = 5062
obj = -8.012554, rho = -0.377944
nSV = 294, nBSV = 4
.........
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2199
.obj = -1419.671101, rho = -3.426234
nSV = 383, nBSV = 370
..*.*............
Warning: using -h 0 may be faster
*
optimization finished, #iter = 1862
obj = -1456.486637, rho = -3.146674
nSV = 377, nBSV = 363
......................*
optimization finished, #iter = 46877
obj = -152.773153, rho = -0.346553
nSV = 248, nBSV = 137
..*.
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2125
obj = -1401.882575, rho = -3.328296
nSV = 381, nBSV = 367
*
optimization finished, #iter = 18176
obj = -19.667910, rho = -0.372951
nSV = 232, nBSV = 12
...........
Warning: using -h 0 may be faster
*..........
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2127
obj = -1500.364701, rho = -3.207050
nSV = 390, nBSV = 375
.........*....*........
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2145
obj = -1448.364398, rho = -3.210599
nSV = 373, nBSV = 359
..........*.
optimization finished, #iter = 11662
obj = -7.770484, rho = -0.375078
nSV = 290, nBSV = 2
....
Warning: using -h 0 may be faster
*...*
optimization finished, #iter = 8861
obj = -1093.315966, rho = -1.489447
nSV = 325, nBSV = 305
..*..
Warning: using -h 0 may be faster
*...
Warning: using -h 0 may be faster
*
optimization finished, #iter = 7436
obj = -1091.332876, rho = -1.558007
nSV = 313, nBSV = 290
....*
optimization finished, #iter = 43264
obj = -148.078302, rho = -0.342942
nSV = 245, nBSV = 132
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2348
obj = -1426.580234, rho = -3.433941
nSV = 385, nBSV = 373
.....
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2065
obj = -1427.837573, rho = -3.363091
nSV = 382, nBSV = 368
......
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 2217
.obj = -1438.211060, rho = -3.174569
nSV = 380, nBSV = 364
............
Warning: using -h 0 may be faster
*.....
Warning: using -h 0 may be faster
*
optimization finished, #iter = 8257
obj = -1077.019569, rho = -1.481852
nSV = 311, nBSV = 293
...............*..............
Warning: using -h 0 may be faster
*..
Warning: using -h 0 may be faster
*...
Warning: using -h 0 may be faster
*
optimization finished, #iter = 8140
.obj = -1116.937668, rho = -1.501907
nSV = 332, nBSV = 312
*
optimization finished, #iter = 7004
.obj = -1076.212622, rho = -1.468752
nSV = 317, nBSV = 296
..
Warning: using -h 0 may be faster
*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 2385
obj = -1429.693474, rho = -3.313559
nSV = 380, nBSV = 364
.
Warning: using -h 0 may be faster
.*......................*.
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 33690
obj = -727.220429, rho = -0.309744
nSV = 273, nBSV = 236
..............*........*
optimization finished, #iter = 48194
obj = -145.895064, rho = -0.356052
nSV = 244, nBSV = 129
........................*....................................
Warning: using -h 0 may be faster
*...
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 7271
obj = -1048.833270, rho = -1.474960
nSV = 302, nBSV = 284
...........
Warning: using -h 0 may be faster
*......
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 7365
obj = -1062.041219, rho = -1.560079
nSV = 308, nBSV = 290
.......
Warning: using -h 0 may be faster
*.*...*
optimization finished, #iter = 47039
.obj = -154.752861, rho = -0.332239
nSV = 254, nBSV = 142
..
Warning: using -h 0 may be faster
*..............*.....*
optimization finished, #iter = 28058
obj = -726.522959, rho = -0.215324
nSV = 264, nBSV = 225
.
Warning: using -h 0 may be faster
*..*
optimization finished, #iter = 6233
obj = -1092.614605, rho = -1.463152
nSV = 316, nBSV = 295
.............*..*
optimization finished, #iter = 36135
obj = -725.272862, rho = -0.380602
nSV = 262, nBSV = 224
........................*.......................................................*.................*
optimization finished, #iter = 24622
obj = -25.535165, rho = -0.377271
nSV = 242, nBSV = 7
....*..
Warning: using -h 0 may be faster
.*.
Warning: using -h 0 may be faster
*
optimization finished, #iter = 6216
obj = -1063.622422, rho = -1.537629
nSV = 316, nBSV = 293
................*.......*.....
Warning: using -h 0 may be faster
*......
Warning: using -h 0 may be faster
*.
optimization finished, #iter = 7169
.obj = -1125.857431, rho = -1.476289
nSV = 320, nBSV = 301
...............................
Warning: using -h 0 may be faster
*...............................
Warning: using -h 0 may be faster
....*...*
optimization finished, #iter = 36848
obj = -740.576317, rho = -0.330154
nSV = 265, nBSV = 230
......*................*..........*...........*
optimization finished, #iter = 21983
.obj = -28.146993, rho = -0.378876
nSV = 243, nBSV = 8
......*
optimization finished, #iter = 87737
obj = -269.911009, rho = -0.352734
nSV = 250, nBSV = 129
..........*.....*....*
optimization finished, #iter = 82787
obj = -262.226135, rho = -0.333049
nSV = 238, nBSV = 117
.....................*..*
optimization finished, #iter = 27006
obj = -29.103188, rho = -0.377792
nSV = 245, nBSV = 9
..........................*...*...........................................*.......
Warning: using -h 0 may be faster
..*.......................*.*.
optimization finished, #iter = 34080
obj = -708.306754, rho = -0.325082
nSV = 268, nBSV = 225
*.*...*...
Warning: using -h 0 may be faster
*
optimization finished, #iter = 25385
obj = -28.286967, rho = -0.376228
nSV = 240, nBSV = 9
......*
optimization finished, #iter = 22880
obj = -26.767087, rho = -0.379929
nSV = 243, nBSV = 8
......
Warning: using -h 0 may be faster
*.............................*
optimization finished, #iter = 38192
*obj = -760.301765, rho = -0.347091
nSV = 272, nBSV = 234
......
Warning: using -h 0 may be faster
..*
optimization finished, #iter = 24052
obj = -29.966073, rho = -0.379010
nSV = 236, nBSV = 8
.........................*..*............
Warning: using -h 0 may be faster
*................*...*
optimization finished, #iter = 28597
obj = -18.576622, rho = -0.375367
nSV = 229, nBSV = 5
....*.*
optimization finished, #iter = 38049
obj = -724.243019, rho = -0.307888
nSV = 260, nBSV = 217
................................*..
Warning: using -h 0 may be faster
...*
optimization finished, #iter = 26957
obj = -29.154822, rho = -0.375971
nSV = 231, nBSV = 8
.....................*.....
Warning: using -h 0 may be faster
..*.............................
Warning: using -h 0 may be faster
....**...*.
optimization finished, #iter = 7856
obj = -8.692063, rho = -0.375289
nSV = 290, nBSV = 2
..*..........*.....................*
optimization finished, #iter = 6789
obj = -11.322358, rho = -0.379091
nSV = 295, nBSV = 3
..................*...*................*
optimization finished, #iter = 80036
obj = -265.990225, rho = -0.325770
nSV = 237, nBSV = 117
.............*.....
Warning: using -h 0 may be faster
.*
optimization finished, #iter = 25851
obj = -29.674430, rho = -0.380024
nSV = 226, nBSV = 8
.............*....*
optimization finished, #iter = 27559
obj = -30.259796, rho = -0.378443
.nSV = 246, nBSV = 9
.................................
Warning: using -h 0 may be faster
.*.*......
Warning: using -h 0 may be faster
..*.................*......*
optimization finished, #iter = 7025
obj = -11.592746, rho = -0.376716
nSV = 287, nBSV = 4
...*.
optimization finished, #iter = 93424
obj = -250.642086, rho = -0.341229
nSV = 229, nBSV = 119
.............................
Warning: using -h 0 may be faster
..
Warning: using -h 0 may be faster
*.*.
Warning: using -h 0 may be faster
.......*.............*
optimization finished, #iter = 4332
.obj = -5.361557, rho = -0.376385
nSV = 268, nBSV = 1
....*.*...*
optimization finished, #iter = 36080
obj = -748.777770, rho = -0.273011
nSV = 277, nBSV = 235
....*
optimization finished, #iter = 18456
obj = -11.715874, rho = -0.377330
nSV = 290, nBSV = 2
........*.*
optimization finished, #iter = 9816
obj = -10.860997, rho = -0.377491
nSV = 290, nBSV = 3
.........*......*..................
Warning: using -h 0 may be faster
*.....................*
optimization finished, #iter = 33750
obj = -723.344636, rho = -0.260715
nSV = 268, nBSV = 230
..........*.......*.................*.
optimization finished, #iter = 15420
obj = -11.715147, rho = -0.375906
nSV = 286, nBSV = 2
..........
Warning: using -h 0 may be faster
......*.....*
optimization finished, #iter = 14759
obj = -11.727731, rho = -0.375397
nSV = 282, nBSV = 2
........................*...
Warning: using -h 0 may be faster
*...*
optimization finished, #iter = 9832
obj = -11.675405, rho = -0.377594
nSV = 283, nBSV = 2
......*
optimization finished, #iter = 98437
obj = -254.159996, rho = -0.333277
nSV = 240, nBSV = 119
..................*...*
optimization finished, #iter = 16771
obj = -10.655115, rho = -0.375730
nSV = 292, nBSV = 2
.................
Warning: using -h 0 may be faster
*...................*
optimization finished, #iter = 31795
obj = -722.675491, rho = -0.254995
nSV = 266, nBSV = 223
.............................*........................................................*...*..................*
optimization finished, #iter = 79108
obj = -273.132858, rho = -0.356526
nSV = 247, nBSV = 124
................................*..................*
optimization finished, #iter = 89745
obj = -260.052190, rho = -0.339371
nSV = 241, nBSV = 120
....................*..........................................................*.....*
optimization finished, #iter = 87631
obj = -262.841829, rho = -0.344771
nSV = 245, nBSV = 131
...................................*..........................................................*.............*
optimization finished, #iter = 87592
obj = -262.461382, rho = -0.377731
nSV = 243, nBSV = 122
....*.................................................................*...*
optimization finished, #iter = 99830
obj = -274.677412, rho = -0.335479
nSV = 255, nBSV = 130
[LibSVM].......................*...................................*.............*
optimization finished, #iter = 70762
obj = -163.579109, rho = -0.339624
nSV = 264, nBSV = 151
Best parameters for RBF kernel: {'C': 50, 'gamma': 10}
Accuracy with the best RBF model: 88.75%
model = SVR(kernel = 'rbf',C=10,gamma=1)
# Train the model on the training data
model.fit(X_train, y_train)
# Make predictions on the testing data
y_pred = model.predict(X_test)
# Evaluate the model
test_mse = mean_squared_error(y_test, y_pred)
test_r2 = r2_score(y_test, y_pred)
print(f'Test Mean Squared Error: {test_mse}')
print(f'Test R-squared: {test_r2}')
Test Mean Squared Error: 0.0038995784534656534 Test R-squared: 0.8727087538040207
import matplotlib.pyplot as plt
# Plot actual vs predicted
plt.figure(figsize=(25, 12)) # Enlarge the plot
plt.plot(y_pred, color='blue', label='Predicted')
plt.plot(y_test, color='red', label='Actual') # Plotting the line y=x for reference
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Actual vs Predicted Values')
plt.legend()
# Set xlim and ylim
plt.xlim(0, 500)
plt.ylim(0, 1)
plt.show()
residuals = y_test - y_pred.flatten()
# Plot histogram of residuals
plt.figure(figsize=(10, 6))
plt.hist(residuals, bins=30, color='purple', alpha=0.7)
plt.title('Histogram of Residuals')
plt.xlabel('Residual')
plt.ylabel('Frequency')
plt.show()
target = 'mental_disorders'
# Model Building: SVR with RBF kernel
lm_model = LinearRegression()
lm_model.fit(X_train, y_train)
# Model Evaluation
y_pred = lm_model.predict(X_test)
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)
print(f'Mean Squared Error: {mse}')
print(f'R-squared: {r2}')
plt.figure(figsize=(25, 12))
plt.plot(y_pred, color='blue', label='Predicted')
plt.plot(y_test, color='red', label='Actual') # Plotting the line y=x for reference
plt.xlabel('Actual Values')
plt.ylabel('Predicted Values')
plt.title('Actual vs Predicted Values')
plt.legend()
# Plotting residuals
residuals = y_test - y_pred.flatten()
# Plot histogram of residuals
plt.figure(figsize=(10, 6))
plt.hist(residuals, bins=30, color='purple', alpha=0.7)
plt.title('Histogram of Residuals')
plt.xlabel('Residual')
plt.ylabel('Frequency')
plt.show()
Mean Squared Error: 0.003954179689872241 R-squared: 0.8709264433545774
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout
from keras.callbacks import ModelCheckpoint, EarlyStopping,ReduceLROnPlateau
import numpy as np
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Bidirectional, Dense, Dropout
# Assuming merged_numeric_scaled is the standardized feature data
# Extract the target variable before performing PCA
target_column = 'mental_disorders'
y = merged_df[target_column].values
# Drop the target column from the feature data
X = merged_df.drop(columns=[target_column,'Entity','Code_x','Code_y']).values
# Perform PCA to reduce to 4 components
pca = PCA(n_components=4)
pca_out = pca.fit_transform(X)
# Function to create sequences
def create_sequences(data, target, n_timesteps):
sequences = []
targets = []
for i in range(len(data) - n_timesteps + 1):
seq = data[i:i + n_timesteps]
sequences.append(seq)
targets.append(target[i + n_timesteps - 1])
return np.array(sequences), np.array(targets)
# Assuming each sample should be a sequence of 10 timesteps
n_timesteps = 10
n_features = pca_out.shape[1]
# Create sequences
X_seq, y_seq = create_sequences(pca_out, y, n_timesteps)
# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X_seq, y_seq, test_size=0.3, random_state=42)
# Print the number of features
print(n_features)
# Create the RNN model with Bidirectional LSTM layers for regression task
model2 = Sequential()
# Bidirectional LSTM layer with Dropout regularization
model2.add(Bidirectional(LSTM(units=64, return_sequences=True, input_shape=(n_timesteps, n_features))))
model2.add(Dropout(0.3))
# Second Bidirectional LSTM layer
model2.add(Bidirectional(LSTM(units=64, return_sequences=True)))
model2.add(Dropout(0.3))
# Third Bidirectional LSTM layer
model2.add(Bidirectional(LSTM(units=64)))
model2.add(Dropout(0.3))
# Output layer for regression task
model2.add(Dense(units=1))
# Compile the model with rmsprop optimizer
model2.compile(optimizer='adam', loss='mse', metrics=['mae'])
# Summary of the model
model2.summary()
# Early stopping callback
early_stopping = EarlyStopping(monitor='val_loss', patience=10, restore_best_weights=True)
# Reduce learning rate on plateau callback
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, patience=5, min_lr=0.001)
# Train the model
history = model2.fit(X_train, y_train, epochs=100, batch_size=32, validation_data=(X_test, y_test), callbacks=[early_stopping, reduce_lr])
# Plot training & validation metrics
plt.figure(figsize=(12, 6))
# Plot loss
plt.subplot(1, 2, 1)
plt.plot(history.history['loss'], label='Training Loss')
plt.plot(history.history['val_loss'], label='Validation Loss')
plt.title('Model Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
# Plot mean absolute error
plt.subplot(1, 2, 2)
plt.plot(history.history['mae'], label='Training MAE')
plt.plot(history.history['val_mae'], label='Validation MAE')
plt.title('Model Mean Absolute Error')
plt.xlabel('Epoch')
plt.ylabel('Mean Absolute Error')
plt.legend()
plt.show()
# Make predictions
y_pred = model2.predict(X_test)
# Plot actual vs. predicted values using a line plot
plt.figure(figsize=(10, 6))
plt.plot(range(len(y_test)), y_test, label='Actual Values', linestyle='-', marker='o')
plt.plot(range(len(y_test)), y_pred, label='Predicted Values', linestyle='-', marker='x')
plt.title('Actual vs. Predicted Values')
plt.xlabel('Sample Index')
plt.ylabel('Mental Disorders')
plt.legend()
plt.show()
4
/Applications/Anaconda/anaconda3/lib/python3.11/site-packages/keras/src/layers/rnn/rnn.py:204: UserWarning: Do not pass an `input_shape`/`input_dim` argument to a layer. When using Sequential models, prefer using an `Input(shape)` object as the first layer in the model instead. super().__init__(**kwargs)
Model: "sequential"
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━┓ ┃ Layer (type) ┃ Output Shape ┃ Param # ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━┩ │ bidirectional (Bidirectional) │ ? │ 0 (unbuilt) │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout (Dropout) │ ? │ 0 (unbuilt) │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ bidirectional_1 (Bidirectional) │ ? │ 0 (unbuilt) │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_1 (Dropout) │ ? │ 0 (unbuilt) │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ bidirectional_2 (Bidirectional) │ ? │ 0 (unbuilt) │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dropout_2 (Dropout) │ ? │ 0 (unbuilt) │ ├─────────────────────────────────┼────────────────────────┼───────────────┤ │ dense (Dense) │ ? │ 0 (unbuilt) │ └─────────────────────────────────┴────────────────────────┴───────────────┘
Total params: 0 (0.00 B)
Trainable params: 0 (0.00 B)
Non-trainable params: 0 (0.00 B)
Epoch 1/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 4s 16ms/step - loss: 0.0424 - mae: 0.1527 - val_loss: 0.0145 - val_mae: 0.0981 - learning_rate: 0.0010 Epoch 2/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0138 - mae: 0.0875 - val_loss: 0.0108 - val_mae: 0.0802 - learning_rate: 0.0010 Epoch 3/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0120 - mae: 0.0813 - val_loss: 0.0103 - val_mae: 0.0744 - learning_rate: 0.0010 Epoch 4/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 3s 21ms/step - loss: 0.0110 - mae: 0.0779 - val_loss: 0.0092 - val_mae: 0.0705 - learning_rate: 0.0010 Epoch 5/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 3s 22ms/step - loss: 0.0099 - mae: 0.0742 - val_loss: 0.0100 - val_mae: 0.0787 - learning_rate: 0.0010 Epoch 6/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 3s 22ms/step - loss: 0.0101 - mae: 0.0755 - val_loss: 0.0087 - val_mae: 0.0665 - learning_rate: 0.0010 Epoch 7/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 3s 22ms/step - loss: 0.0097 - mae: 0.0729 - val_loss: 0.0084 - val_mae: 0.0630 - learning_rate: 0.0010 Epoch 8/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 3s 22ms/step - loss: 0.0091 - mae: 0.0701 - val_loss: 0.0081 - val_mae: 0.0638 - learning_rate: 0.0010 Epoch 9/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 20ms/step - loss: 0.0089 - mae: 0.0708 - val_loss: 0.0092 - val_mae: 0.0724 - learning_rate: 0.0010 Epoch 10/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0092 - mae: 0.0712 - val_loss: 0.0081 - val_mae: 0.0621 - learning_rate: 0.0010 Epoch 11/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0087 - mae: 0.0688 - val_loss: 0.0082 - val_mae: 0.0637 - learning_rate: 0.0010 Epoch 12/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0084 - mae: 0.0681 - val_loss: 0.0082 - val_mae: 0.0669 - learning_rate: 0.0010 Epoch 13/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0086 - mae: 0.0693 - val_loss: 0.0080 - val_mae: 0.0629 - learning_rate: 0.0010 Epoch 14/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0088 - mae: 0.0685 - val_loss: 0.0078 - val_mae: 0.0640 - learning_rate: 0.0010 Epoch 15/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0085 - mae: 0.0670 - val_loss: 0.0080 - val_mae: 0.0661 - learning_rate: 0.0010 Epoch 16/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0084 - mae: 0.0675 - val_loss: 0.0078 - val_mae: 0.0633 - learning_rate: 0.0010 Epoch 17/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 20ms/step - loss: 0.0083 - mae: 0.0668 - val_loss: 0.0076 - val_mae: 0.0613 - learning_rate: 0.0010 Epoch 18/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 20ms/step - loss: 0.0079 - mae: 0.0650 - val_loss: 0.0079 - val_mae: 0.0655 - learning_rate: 0.0010 Epoch 19/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0083 - mae: 0.0663 - val_loss: 0.0076 - val_mae: 0.0617 - learning_rate: 0.0010 Epoch 20/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0077 - mae: 0.0641 - val_loss: 0.0086 - val_mae: 0.0643 - learning_rate: 0.0010 Epoch 21/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0081 - mae: 0.0652 - val_loss: 0.0079 - val_mae: 0.0640 - learning_rate: 0.0010 Epoch 22/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0077 - mae: 0.0636 - val_loss: 0.0078 - val_mae: 0.0638 - learning_rate: 0.0010 Epoch 23/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0081 - mae: 0.0655 - val_loss: 0.0075 - val_mae: 0.0607 - learning_rate: 0.0010 Epoch 24/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0076 - mae: 0.0625 - val_loss: 0.0073 - val_mae: 0.0589 - learning_rate: 0.0010 Epoch 25/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0074 - mae: 0.0615 - val_loss: 0.0075 - val_mae: 0.0597 - learning_rate: 0.0010 Epoch 26/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0082 - mae: 0.0646 - val_loss: 0.0075 - val_mae: 0.0610 - learning_rate: 0.0010 Epoch 27/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0078 - mae: 0.0643 - val_loss: 0.0079 - val_mae: 0.0633 - learning_rate: 0.0010 Epoch 28/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0077 - mae: 0.0639 - val_loss: 0.0078 - val_mae: 0.0640 - learning_rate: 0.0010 Epoch 29/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0078 - mae: 0.0641 - val_loss: 0.0074 - val_mae: 0.0590 - learning_rate: 0.0010 Epoch 30/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0076 - mae: 0.0631 - val_loss: 0.0074 - val_mae: 0.0625 - learning_rate: 0.0010 Epoch 31/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0075 - mae: 0.0631 - val_loss: 0.0081 - val_mae: 0.0678 - learning_rate: 0.0010 Epoch 32/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0075 - mae: 0.0641 - val_loss: 0.0077 - val_mae: 0.0606 - learning_rate: 0.0010 Epoch 33/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0077 - mae: 0.0636 - val_loss: 0.0073 - val_mae: 0.0591 - learning_rate: 0.0010 Epoch 34/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0071 - mae: 0.0614 - val_loss: 0.0075 - val_mae: 0.0590 - learning_rate: 0.0010 Epoch 35/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0069 - mae: 0.0595 - val_loss: 0.0074 - val_mae: 0.0633 - learning_rate: 0.0010 Epoch 36/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0069 - mae: 0.0612 - val_loss: 0.0070 - val_mae: 0.0576 - learning_rate: 0.0010 Epoch 37/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0071 - mae: 0.0611 - val_loss: 0.0073 - val_mae: 0.0608 - learning_rate: 0.0010 Epoch 38/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0068 - mae: 0.0601 - val_loss: 0.0076 - val_mae: 0.0633 - learning_rate: 0.0010 Epoch 39/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0069 - mae: 0.0614 - val_loss: 0.0072 - val_mae: 0.0618 - learning_rate: 0.0010 Epoch 40/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0074 - mae: 0.0618 - val_loss: 0.0068 - val_mae: 0.0587 - learning_rate: 0.0010 Epoch 41/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0069 - mae: 0.0602 - val_loss: 0.0071 - val_mae: 0.0623 - learning_rate: 0.0010 Epoch 42/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0071 - mae: 0.0603 - val_loss: 0.0069 - val_mae: 0.0579 - learning_rate: 0.0010 Epoch 43/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0065 - mae: 0.0579 - val_loss: 0.0067 - val_mae: 0.0583 - learning_rate: 0.0010 Epoch 44/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0064 - mae: 0.0567 - val_loss: 0.0067 - val_mae: 0.0552 - learning_rate: 0.0010 Epoch 45/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0069 - mae: 0.0593 - val_loss: 0.0065 - val_mae: 0.0557 - learning_rate: 0.0010 Epoch 46/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0065 - mae: 0.0576 - val_loss: 0.0067 - val_mae: 0.0585 - learning_rate: 0.0010 Epoch 47/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0066 - mae: 0.0574 - val_loss: 0.0060 - val_mae: 0.0533 - learning_rate: 0.0010 Epoch 48/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0063 - mae: 0.0567 - val_loss: 0.0061 - val_mae: 0.0561 - learning_rate: 0.0010 Epoch 49/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 18ms/step - loss: 0.0059 - mae: 0.0555 - val_loss: 0.0061 - val_mae: 0.0564 - learning_rate: 0.0010 Epoch 50/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 20ms/step - loss: 0.0061 - mae: 0.0552 - val_loss: 0.0064 - val_mae: 0.0549 - learning_rate: 0.0010 Epoch 51/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0059 - mae: 0.0544 - val_loss: 0.0062 - val_mae: 0.0557 - learning_rate: 0.0010 Epoch 52/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0057 - mae: 0.0547 - val_loss: 0.0058 - val_mae: 0.0533 - learning_rate: 0.0010 Epoch 53/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0059 - mae: 0.0546 - val_loss: 0.0060 - val_mae: 0.0525 - learning_rate: 0.0010 Epoch 54/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0057 - mae: 0.0535 - val_loss: 0.0058 - val_mae: 0.0503 - learning_rate: 0.0010 Epoch 55/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0061 - mae: 0.0546 - val_loss: 0.0053 - val_mae: 0.0492 - learning_rate: 0.0010 Epoch 56/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0058 - mae: 0.0527 - val_loss: 0.0055 - val_mae: 0.0505 - learning_rate: 0.0010 Epoch 57/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0054 - mae: 0.0523 - val_loss: 0.0058 - val_mae: 0.0524 - learning_rate: 0.0010 Epoch 58/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0050 - mae: 0.0508 - val_loss: 0.0067 - val_mae: 0.0558 - learning_rate: 0.0010 Epoch 59/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0059 - mae: 0.0541 - val_loss: 0.0059 - val_mae: 0.0547 - learning_rate: 0.0010 Epoch 60/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0053 - mae: 0.0517 - val_loss: 0.0055 - val_mae: 0.0521 - learning_rate: 0.0010 Epoch 61/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0056 - mae: 0.0527 - val_loss: 0.0048 - val_mae: 0.0458 - learning_rate: 0.0010 Epoch 62/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0050 - mae: 0.0501 - val_loss: 0.0052 - val_mae: 0.0495 - learning_rate: 0.0010 Epoch 63/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0051 - mae: 0.0505 - val_loss: 0.0054 - val_mae: 0.0493 - learning_rate: 0.0010 Epoch 64/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0052 - mae: 0.0506 - val_loss: 0.0056 - val_mae: 0.0513 - learning_rate: 0.0010 Epoch 65/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0052 - mae: 0.0513 - val_loss: 0.0049 - val_mae: 0.0463 - learning_rate: 0.0010 Epoch 66/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0047 - mae: 0.0486 - val_loss: 0.0053 - val_mae: 0.0505 - learning_rate: 0.0010 Epoch 67/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0047 - mae: 0.0489 - val_loss: 0.0053 - val_mae: 0.0484 - learning_rate: 0.0010 Epoch 68/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0045 - mae: 0.0475 - val_loss: 0.0049 - val_mae: 0.0502 - learning_rate: 0.0010 Epoch 69/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0048 - mae: 0.0485 - val_loss: 0.0048 - val_mae: 0.0475 - learning_rate: 0.0010 Epoch 70/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0048 - mae: 0.0486 - val_loss: 0.0046 - val_mae: 0.0456 - learning_rate: 0.0010 Epoch 71/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0046 - mae: 0.0480 - val_loss: 0.0045 - val_mae: 0.0468 - learning_rate: 0.0010 Epoch 72/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0041 - mae: 0.0444 - val_loss: 0.0045 - val_mae: 0.0465 - learning_rate: 0.0010 Epoch 73/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0044 - mae: 0.0458 - val_loss: 0.0042 - val_mae: 0.0447 - learning_rate: 0.0010 Epoch 74/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0045 - mae: 0.0465 - val_loss: 0.0057 - val_mae: 0.0518 - learning_rate: 0.0010 Epoch 75/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0042 - mae: 0.0458 - val_loss: 0.0051 - val_mae: 0.0478 - learning_rate: 0.0010 Epoch 76/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0047 - mae: 0.0488 - val_loss: 0.0046 - val_mae: 0.0454 - learning_rate: 0.0010 Epoch 77/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0048 - mae: 0.0470 - val_loss: 0.0043 - val_mae: 0.0469 - learning_rate: 0.0010 Epoch 78/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0040 - mae: 0.0452 - val_loss: 0.0042 - val_mae: 0.0431 - learning_rate: 0.0010 Epoch 79/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0047 - mae: 0.0484 - val_loss: 0.0039 - val_mae: 0.0414 - learning_rate: 0.0010 Epoch 80/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0044 - mae: 0.0468 - val_loss: 0.0049 - val_mae: 0.0470 - learning_rate: 0.0010 Epoch 81/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0042 - mae: 0.0455 - val_loss: 0.0042 - val_mae: 0.0449 - learning_rate: 0.0010 Epoch 82/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0042 - mae: 0.0456 - val_loss: 0.0037 - val_mae: 0.0423 - learning_rate: 0.0010 Epoch 83/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0039 - mae: 0.0442 - val_loss: 0.0044 - val_mae: 0.0458 - learning_rate: 0.0010 Epoch 84/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0041 - mae: 0.0445 - val_loss: 0.0040 - val_mae: 0.0427 - learning_rate: 0.0010 Epoch 85/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0038 - mae: 0.0433 - val_loss: 0.0043 - val_mae: 0.0434 - learning_rate: 0.0010 Epoch 86/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0041 - mae: 0.0446 - val_loss: 0.0044 - val_mae: 0.0463 - learning_rate: 0.0010 Epoch 87/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0039 - mae: 0.0437 - val_loss: 0.0040 - val_mae: 0.0423 - learning_rate: 0.0010 Epoch 88/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0037 - mae: 0.0426 - val_loss: 0.0039 - val_mae: 0.0430 - learning_rate: 0.0010 Epoch 89/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0037 - mae: 0.0429 - val_loss: 0.0041 - val_mae: 0.0428 - learning_rate: 0.0010 Epoch 90/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0035 - mae: 0.0425 - val_loss: 0.0037 - val_mae: 0.0418 - learning_rate: 0.0010 Epoch 91/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0034 - mae: 0.0417 - val_loss: 0.0037 - val_mae: 0.0407 - learning_rate: 0.0010 Epoch 92/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0035 - mae: 0.0410 - val_loss: 0.0037 - val_mae: 0.0404 - learning_rate: 0.0010 Epoch 93/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0033 - mae: 0.0403 - val_loss: 0.0037 - val_mae: 0.0413 - learning_rate: 0.0010 Epoch 94/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0035 - mae: 0.0413 - val_loss: 0.0037 - val_mae: 0.0396 - learning_rate: 0.0010 Epoch 95/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0034 - mae: 0.0405 - val_loss: 0.0034 - val_mae: 0.0401 - learning_rate: 0.0010 Epoch 96/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0035 - mae: 0.0411 - val_loss: 0.0031 - val_mae: 0.0387 - learning_rate: 0.0010 Epoch 97/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0031 - mae: 0.0389 - val_loss: 0.0037 - val_mae: 0.0394 - learning_rate: 0.0010 Epoch 98/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0034 - mae: 0.0406 - val_loss: 0.0031 - val_mae: 0.0378 - learning_rate: 0.0010 Epoch 99/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0031 - mae: 0.0399 - val_loss: 0.0034 - val_mae: 0.0390 - learning_rate: 0.0010 Epoch 100/100 122/122 ━━━━━━━━━━━━━━━━━━━━ 2s 19ms/step - loss: 0.0030 - mae: 0.0384 - val_loss: 0.0031 - val_mae: 0.0378 - learning_rate: 0.0010
52/52 ━━━━━━━━━━━━━━━━━━━━ 1s 8ms/step
residuals = y_test - y_pred.flatten()
# Plot histogram of residuals
plt.figure(figsize=(10, 6))
plt.hist(residuals, bins=30, color='purple', alpha=0.7)
plt.title('Histogram of Residuals')
plt.xlabel('Residual')
plt.ylabel('Frequency')
plt.show()
r2 = r2_score(y_test, y_pred)
print(f'R-squared: {r2}')
R-squared: 0.9043033169059241 [LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM][LibSVM]